Thesis Benchmark Implementation: Robust Deep Learning¶
This notebook implements the baseline algorithms for comparison against the proposed algorithm.
The goal is to reproduce the performance of standard and existing robust methods on datasets with known noise.
1. Imports¶
We'll begin by importing the necessary libraries from PyTorch, TorchVision, and NumPy.
# --- 1. Imports ---
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm.notebook import tqdm # For progress bars
# Set a random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
# --- Updated Device Setup ---
if torch.cuda.is_available():
device = torch.device("cuda")
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
elif torch.backends.mps.is_available():
# This is for Apple's M-series chips
device = torch.device("mps")
else:
device = torch.device("cpu")
print(f"Using device: {device}")
Using device: mps
# --- Mount Drive (Colab) or Define Local Path (Local) ---
import os
# Check if we are in Google Colab
try:
import google.colab
IS_COLAB = True
except ImportError:
IS_COLAB = False
if IS_COLAB:
print("Running on Google Colab. Mounting Drive...")
from google.colab import drive
drive.mount('/content/drive')
# Use the Google Drive path
checkpoint_dir = '/content/drive/MyDrive/ThesisCheckpoints'
else:
print("Running locally. Using local checkpoint directory.")
# Use a local folder named 'ThesisCheckpoints' in your notebook's directory
checkpoint_dir = './ThesisCheckpoints'
# --- This part is the same for both ---
# Ensure the directory exists
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
print(f"Created directory: {checkpoint_dir}")
else:
print(f"Checkpoint directory already exists: {checkpoint_dir}")
Running locally. Using local checkpoint directory. Checkpoint directory already exists: ./ThesisCheckpoints
## 1.1 Data Setup: Download Cloud Dataset (Task 2)
# PURPOSE: Downloads and unzips the dataset. Requires kaggle.json in the file system.
import os
def download_cloud_data():
dataset_name = "zeesolver/cloiud-dataset"
target_dir = "./data/task_2_clouds"
# Check if data already exists to avoid re-downloading
if os.path.exists(target_dir) and len(os.listdir(target_dir)) > 0:
print(f"✅ Dataset already found at {target_dir}")
return
print(f"⬇️ Downloading {dataset_name}...")
# Install Kaggle client if missing
os.system("pip install -q kaggle")
# Set config dir to current directory if kaggle.json is here
if os.path.exists("kaggle.json"):
os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()
# Download and unzip
# -d: dataset, -p: path, --unzip: automatic unzip
ret = os.system(f"kaggle datasets download -d {dataset_name} -p {target_dir} --unzip")
if ret == 0:
print("✅ Download and unzip complete.")
else:
print("❌ Download failed. Make sure 'kaggle.json' is uploaded to the runtime.")
download_cloud_data()
⬇️ Downloading zeesolver/cloiud-dataset...
[notice] A new release of pip is available: 25.2 -> 25.3 [notice] To update, run: pip install --upgrade pip /Users/selim/Desktop/**WS2526/Thesis/Implementation Trials/.venv/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020 warnings.warn(
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /Users/selim/Desktop/**WS2526/Thesis/Implementation Trials/kaggle.json' Dataset URL: https://www.kaggle.com/datasets/zeesolver/cloiud-dataset License(s): apache-2.0 Downloading cloiud-dataset.zip to ./data/task_2_clouds
100%|██████████| 5.10M/5.10M [00:00<00:00, 1.01GB/s]
✅ Download and unzip complete.
## 1.2 Analysis: Calculate Signal Power (E[x^2])
# PLACE THIS: After the download cell.
# PURPOSE: Measures the strength of the clean signal so we can calibrate the noise.
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
def get_signal_power(data_path, img_size=64):
print(f"📊 Analyzing signal power in: {data_path}")
# Transform: Resize to 64x64 and convert to Tensor (0-1)
# We do NOT normalize (subtract mean) here because we need raw energy.
transform = transforms.Compose([
transforms.Resize((img_size, img_size)),
transforms.ToTensor()
])
try:
# Load dataset
ds = ImageFolder(root=data_path, transform=transform)
dl = DataLoader(ds, batch_size=64, shuffle=False)
squared_sum = 0.0
count = 0
for imgs, _ in dl:
# imgs: [batch, 3, 64, 64]
squared_sum += torch.sum(imgs ** 2).item()
count += imgs.numel()
power = squared_sum / count
print(f"✅ Signal Power (E[x^2]): {power:.5f}")
return power
except Exception as e:
print(f"⚠️ Could not load dataset: {e}")
return 0.5 # Default fallback
# Calculate
dataset_path = "./data/task_2_clouds"
signal_power = get_signal_power(dataset_path)
## 1.3 Analysis: Calibrate Gaussian Noise Variance
# PLACE THIS: After calculating signal power.
# PURPOSE: Decides the noise std to ensure the task is "Hard" (SNR < 0.5).
def calibrate_noise(signal_pow):
print("\n⚖️ Calibrating Noise Levels...")
stds = [0.1, 0.3, 0.5, 0.8, 1.0, 1.2]
recommended_std = 0.5
print(f"{'STD':<6} | {'NoiseVar':<8} | {'SNR':<8} | {'Difficulty'}")
print("-" * 45)
for s in stds:
noise_var = s ** 2
snr = signal_pow / noise_var
diff = "EASY"
if snr < 0.2: diff = "CRITICAL"
elif snr < 0.5:
diff = "HARD (Target)"
if recommended_std == 0.5: recommended_std = s # Pick first hard one
elif snr < 1.0: diff = "MEDIUM"
print(f"{s:<6.1f} | {noise_var:<8.2f} | {snr:<8.2f} | {diff}")
print(f"\n🎯 Recommended Noise Std for Task 2: {recommended_std}")
return recommended_std
selected_noise_std = calibrate_noise(signal_power)
📊 Analyzing signal power in: ./data/task_2_clouds ✅ Signal Power (E[x^2]): 0.33258 ⚖️ Calibrating Noise Levels... STD | NoiseVar | SNR | Difficulty --------------------------------------------- 0.1 | 0.01 | 33.26 | EASY 0.3 | 0.09 | 3.70 | EASY 0.5 | 0.25 | 1.33 | EASY 0.8 | 0.64 | 0.52 | MEDIUM 1.0 | 1.00 | 0.33 | HARD (Target) 1.2 | 1.44 | 0.23 | HARD (Target) 🎯 Recommended Noise Std for Task 2: 1.0
2. Baseline 1: Uniform SGD¶
This is the standard, non-robust training procedure. It serves as our main baseline.
- Method: Standard Stochastic Gradient Descent.
- Sampling: Samples are drawn uniformly at random from the entire training dataset in each epoch (as handled by the default
DataLoader).
# --- 2. Baseline 1: Uniform SGD ---
def train_uniform_sgd(model, train_loader, criterion, optimizer, device):
"""
Standard training loop for one epoch using Uniform SGD.
non-robust baseline.
"""
model.train() # Set the model to training mode
running_loss = 0.0
correct_samples = 0
total_samples = 0
# The default train_loader already implements uniform random sampling
# (with shuffling enabled)
for inputs, labels in train_loader:
# Move data to the configured device
inputs, labels = inputs.to(device), labels.to(device)
# 1. Zero the parameter gradients
optimizer.zero_grad()
# 2. Forward pass
outputs = model(inputs)
# 3. Calculate the loss
# Note: We use the raw, unreduced loss for potential future per-sample analysis,
# but for the step, we need the mean.
# For Uniform SGD, we just use the standard reduction ('mean').
# We ensure the criterion is set to 'mean' reduction outside this function.
loss = criterion(outputs, labels)
# 4. Backward pass
loss.backward()
# 5. Optimize
optimizer.step()
# --- Statistics ---
running_loss += loss.item() * inputs.size(0)
# Calculate accuracy
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_samples += (predicted == labels).sum().item()
epoch_loss = running_loss / total_samples
epoch_acc = correct_samples / total_samples
return epoch_loss, epoch_acc
3. Baseline 2: Min-k Loss SGD (MKL-SGD)¶
This is the first robust baseline, based on the paper "Choosing the Sample with Lowest Loss makes SGD Robust".
- Method: This algorithm is a simple variant of SGD. The core idea is that noisy samples or outliers will often have a high loss.
- Algorithm (Theoretical): The paper's
Algorithm 1defines the process:- Choose a set $S_t$ of $k$ samples.
- Find the single sample $i_t$ in that set with the minimum loss: $i_t = \arg \min_{i \in S_t} f_i(w_t)$.
- [cite_start]Perform a standard SGD update using only that one sample: $w_{t+1} = w_t - \eta \nabla f_{i_t}(w_t)$.
- Algorithm (Practical Batch Variant):
For deep learning, updating on a single sample is inefficient. The paper describes a "more practical batched variant" for its neural network experiments[cite: 355]. We will implement this version.
- Load a mini-batch of size $b$.
- Calculate the per-sample loss for all $b$ samples.
- Instead of just picking 1, we select the $m$ samples with the lowest loss. Based on the paper's experiments (e.g., "if $k=2$ the algorithm picks $b/2$ samples" [cite: 355]), we will use a parameter
k_ratioto define this. - $m = \text{int}(b / \text{k\_ratio})$.
- The final loss to be backpropagated is the mean of these $m$ selected low-loss samples.
# --- 3. Baseline 2: MKL-SGD ---
def train_mkl_sgd(model, train_loader, criterion_nored, optimizer, device, k_ratio=2.0):
"""
Training loop for one epoch using Min-k Loss SGD (MKL-SGD).
Based on the "practical batched variant" from the paper[cite: 355].
Args:
model: The neural network.
train_loader: DataLoader for training data.
criterion_nored: Loss function (e.g., CrossEntropyLoss)
initialized with `reduction='none'`.
optimizer: The optimizer (e.g., SGD, Adam).
device: The device to run on (cpu or cuda).
k_ratio: The denominator for sample selection (e.g., 2.0 means b/2 samples).
"""
model.train()
running_selected_loss = 0.0
correct_samples = 0
total_samples = 0
total_selected_samples = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
batch_size = inputs.size(0)
# Determine number of samples to select (m = b/k)
# Ensure we select at least one sample
num_to_select = int(batch_size / k_ratio)
if num_to_select == 0:
num_to_select = 1
# 1. Zero the parameter gradients
optimizer.zero_grad()
# 2. Forward pass
outputs = model(inputs)
# 3. Calculate per-sample loss (reduction='none' is required)
per_sample_loss = criterion_nored(outputs, labels)
# 4. Select the m = b/k samples with the lowest loss
sorted_loss, sorted_indices = torch.sort(per_sample_loss)
selected_loss = sorted_loss[:num_to_select]
# 5. Calculate the mean loss *only* for the selected samples
mean_selected_loss = selected_loss.mean()
# 6. Backward pass and optimize on the selected mean loss
mean_selected_loss.backward()
optimizer.step()
# --- Statistics ---
# We track the loss of the selected samples
running_selected_loss += selected_loss.sum().item()
total_selected_samples += num_to_select
# Accuracy is calculated on the entire batch for a fair comparison
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_samples += (predicted == labels).sum().item()
# Average loss over all *selected* samples
epoch_loss = running_selected_loss / total_selected_samples
# Average accuracy over *all* processed samples
epoch_acc = correct_samples / total_samples
return epoch_loss, epoch_acc
4. Baseline 3: Reducible Holdout Loss (RHO-LOSS)¶
This is the second robust baseline, based on the paper "Prioritized Training on Points that are learnable, Worth Learning, and Not Yet Learnt".
Motivation: The paper argues that:
- Selecting "hard" points (high loss), like MKL-SGD, often backfires because these points are frequently noisy (unlearnable) or outliers (less relevant).
- Selecting "easy" points (curriculum learning) is inefficient as it wastes compute on redundant samples that the model has already learned.
Goal: RHO-LOSS aims to select points that are learnable (not noisy), worth learning (not outliers), and not yet learnt (not redundant).
Method (Algorithm 1): The algorithm uses two models:
- The main Target Model (the one we are training).
- A separate Irreducible Loss (IL) Model.
Algorithm Steps:
Phase 1: Pre-Computation (Done Once)
- A holdout set $\mathcal{D}_{ho}$ is set aside.
- The IL Model is trained only on this holdout set $\mathcal{D}_{ho}$.
- We then perform a single forward pass of the entire training dataset through the (frozen) IL Model to calculate the loss for every single training sample $(x_i, y_i)$ .
- This loss, $L[y_i|x_i; \mathcal{D}_{ho}]$, is called the Irreducible Loss (IL). It represents the "unlearnable" part of the sample, (e.g., noise). These values are stored in an array.
Phase 2: Main Training Loop
- At each step, a large "candidate" batch $B_t$ (size $n_B$) is loaded.
- The Target Model calculates the current training loss for each sample in $B_t$: $L[y_i|x_i; \mathcal{D}_{t}]$.
- The RHO-LOSS score is computed for each sample:
RHO-LOSS[i] = Current_Loss[i] - IL[i]. - The algorithm selects the $n_b$ samples with the highest RHO-LOSS scores.
- A gradient step is performed using the mean of the
Current_Loss(not the RHO-LOSS) of these $n_b$ selected samples.
# --- 4. Baseline 3: RHO-LOSS (Phase 1: IL Pre-computation) ---
def compute_irreducible_loss(il_model, train_dataset, criterion_nored, device, batch_size=128):
"""
Computes the Irreducible Loss (IL) for every sample in the train_dataset
using the pre-trained il_model.
Args:
il_model: The pre-trained Irreducible Loss model.
train_dataset: The *entire* training dataset object.
criterion_nored: Loss function (reduction='none').
device: CPU or CUDA.
batch_size: Batch size for this one-time forward pass.
Returns:
A NumPy array containing the IL for each training sample, in order.
"""
il_model.eval() # Set IL model to evaluation mode
all_il_losses = []
# Use a DataLoader to process the dataset efficiently
# IMPORTANT: shuffle=False to maintain dataset order
il_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
print("Computing Irreducible Losses (IL) for all training samples...")
with torch.no_grad(): # No gradients needed
# --- THE FIX IS HERE ---
# The loader yields (data, label, index). We must unpack all three.
# We can use '_' to ignore the index.
for inputs, labels, _ in tqdm(il_loader, desc="IL Computation"):
inputs, labels = inputs.to(device), labels.to(device)
# Forward pass through the IL model
outputs = il_model(inputs)
# Calculate per-sample loss
loss = criterion_nored(outputs, labels)
all_il_losses.append(loss.cpu())
# Concatenate all batch losses into a single tensor
il_loss_map = torch.cat(all_il_losses).numpy()
# Add a check to make sure the map size matches the dataset
if len(il_loss_map) != len(train_dataset):
print(f"Warning: IL map size ({len(il_loss_map)}) does not match"
f" dataset size ({len(train_dataset)}). Check for errors.")
print(f"Computed IL map with shape: {il_loss_map.shape}")
return il_loss_map
# --- 4. Baseline 3: RHO-LOSS (Phase 2: Training Loop) ---
def train_rho_loss(model, il_loss_map, train_loader, criterion_nored, optimizer, device, selection_ratio=0.1):
"""
Training loop for one epoch using RHO-LOSS selection.
Args:
model: The main target model to train.
il_loss_map: The NumPy array of pre-computed Irreducible Losses.
train_loader: DataLoader. IMPORTANT: Must yield (inputs, labels, indices).
criterion_nored: Loss function (reduction='none').
optimizer: The optimizer.
device: CPU or CUDA.
selection_ratio: Ratio of samples to select (nb / nB).
[cite: 1052] (e.g., 0.1 for 10%)
"""
model.train()
running_selected_loss = 0.0
correct_samples = 0
total_samples = 0
total_selected_samples = 0
# Convert IL map to a tensor on the correct device for fast lookup
il_loss_map_tensor = torch.tensor(il_loss_map, dtype=torch.float32).to(device)
for inputs, labels, indices in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
indices = indices.to(device)
batch_size_nB = inputs.size(0) # This is n_B (large batch size)
# Determine number of samples to select (n_b)
num_to_select_nb = int(batch_size_nB * selection_ratio)
if num_to_select_nb == 0:
num_to_select_nb = 1
# 1. Zero the parameter gradients
optimizer.zero_grad()
# 2. Forward pass (Target Model)
outputs = model(inputs)
# 3. Calculate *current* per-sample loss: L[y|x; D_t]
current_loss_per_sample = criterion_nored(outputs, labels)
# 4. Look up pre-computed Irreducible Loss: L[y|x; D_ho]
# We use the indices to get the correct IL for each sample in the batch
batch_il_loss = il_loss_map_tensor[indices]
# 5. Compute RHO-LOSS score: L[D_t] - L[D_ho]
rho_loss_per_sample = current_loss_per_sample - batch_il_loss
# 6. Select the top-nb samples with the *highest* RHO-LOSS score
# We get the indices *within the batch* of the top samples
_, top_batch_indices = torch.topk(rho_loss_per_sample, num_to_select_nb)
# 7. Get the *current loss* (not RHO-LOSS) for the selected samples
# The gradient is computed on the actual loss of the selected samples
selected_current_loss = current_loss_per_sample[top_batch_indices]
# 8. Calculate the mean loss for the backward pass
mean_selected_loss = selected_current_loss.mean()
# 9. Backward pass and optimize
mean_selected_loss.backward()
optimizer.step()
# --- Statistics ---
running_selected_loss += selected_current_loss.sum().item()
total_selected_samples += num_to_select_nb
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_samples += (predicted == labels).sum().item()
epoch_loss = running_selected_loss / total_selected_samples
epoch_acc = correct_samples / total_samples
return epoch_loss, epoch_acc
5. Data Preparation & Noise Injection¶
To run the experiments, we need to load CIFAR-100 and MNIST.
We will create a custom PyTorch Dataset class that wraps the standard torchvision datasets. This new class will have two key features:
- Noise Injection: During initialization, it will check for a
noise_rate(e.g., 0.2 for 20%) and anoise_type(e.g., 'symmetric'). If specified, it will permanently corrupt the specified percentage of labels for the entire training run. - Index Tracking: The
__getitem__method will be modified to return(data, label, index). This index is essential forRHO-LOSS(to look up the Irreducible Loss) and for your proposedHASA(to track the historical loss of each sample).
We will also add a transforms hook to add input noise (like Gaussian noise) for the MNIST experiment.
# --- 5. Custom Dataset for Noise & Indexing ---
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CIFAR100, MNIST, ImageFolder
import numpy as np
import torch
from tqdm import tqdm
import os
class NoisyIndexedDataset(Dataset):
"""
Wraps a torchvision dataset (e.g., CIFAR100, MNIST) or a directory-based dataset (CLOUD) to add:
1. Symmetric label noise.
2. An index `idx` to each returned sample. -> (data, label, idx)
"""
def __init__(self, dataset_name, root, train=True, transform=None, download=True,
noise_type='none', noise_rate=0.0, random_seed=42):
self.transform = transform
self.dataset_name = dataset_name
# --- MODIFICATION: Added CLOUD dataset support via ImageFolder ---
if dataset_name == 'CIFAR100':
self.base_dataset = CIFAR100(root=root, train=train, transform=transform, download=download)
self.num_classes = 100
elif dataset_name == 'MNIST':
self.base_dataset = MNIST(root=root, train=train, transform=transform, download=download)
self.num_classes = 10
elif dataset_name == 'CLOUD':
# Assumes 'root' points to the dataset folder.
# If your dataset has specific 'train'/'test' subfolders, modify the path below.
# e.g., target_root = os.path.join(root, 'train') if train else os.path.join(root, 'test')
self.base_dataset = ImageFolder(root=root, transform=transform)
self.num_classes = len(self.base_dataset.classes)
else:
raise ValueError(f"Unknown dataset: {dataset_name}")
# --- MODIFICATION: Abstracted data/target retrieval ---
# ImageFolder uses .targets (list) and .samples (list of paths), not .data (numpy array)
if hasattr(self.base_dataset, 'targets'):
self.targets = np.array(self.base_dataset.targets)
else:
# Fallback: extract targets from samples if .targets is missing
self.targets = np.array([s[1] for s in self.base_dataset.samples])
if hasattr(self.base_dataset, 'data'):
self.data = self.base_dataset.data
else:
# For ImageFolder (CLOUD), we do NOT load pixels to RAM.
# We store file paths in self.data strictly for shape/indexing consistency.
self.data = [s[0] for s in self.base_dataset.samples]
self.noise_type = noise_type
self.noise_rate = noise_rate
self.rng = np.random.RandomState(random_seed)
self.original_targets = self.targets.copy()
self.noisy_targets = self.targets.copy()
self.noise_mask = np.zeros(len(self.targets), dtype=bool)
if train and self.noise_type != 'none' and self.noise_rate > 0:
self._apply_label_noise()
def _apply_label_noise(self):
"""
Modifies self.noisy_targets with the specified noise.
"""
num_samples = len(self.targets)
num_noisy = int(num_samples * self.noise_rate)
# Select indices to corrupt
noisy_indices = self.rng.choice(num_samples, num_noisy, replace=False)
self.noise_mask[noisy_indices] = True
if self.noise_type == 'symmetric':
print(f"Applying {self.noise_rate*100}% symmetric label noise...")
for i in noisy_indices:
original_label = self.targets[i]
# Generate a random new label, different from the original
new_label_candidates = list(range(self.num_classes))
new_label_candidates.remove(original_label)
new_label = self.rng.choice(new_label_candidates)
self.noisy_targets[i] = new_label
# Verify noise
actual_noise = (self.noisy_targets != self.original_targets).mean()
print(f"Noise applied. Original targets modified. Actual noise rate: {actual_noise:.4f}")
else:
print("No noise type specified or 'none', labels remain clean.")
def __len__(self):
return len(self.base_dataset)
def __getitem__(self, idx):
"""
Returns (data, label, index).
Note: The transform is applied here (internal to base_dataset for ImageFolder/CIFAR).
"""
data, _ = self.base_dataset[idx]
label = self.noisy_targets[idx]
return data, label, idx
# --- 4. Baseline 3: RHO-LOSS (Phase 1: IL Pre-computation) ---
def compute_irreducible_loss(il_model, train_dataset, criterion_nored, device, batch_size=128):
"""
Computes the Irreducible Loss (IL) for every sample in the train_dataset
using the pre-trained il_model.
"""
il_model.eval()
all_il_losses = []
# Use a DataLoader to process the dataset efficiently
# shuffle=False to maintain dataset order
il_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
print("Computing Irreducible Losses (IL) for all training samples...")
with torch.no_grad():
for inputs, labels, _ in tqdm(il_loader, desc="IL Computation"):
inputs, labels = inputs.to(device), labels.to(device)
outputs = il_model(inputs)
loss = criterion_nored(outputs, labels)
all_il_losses.append(loss.cpu())
il_loss_map = torch.cat(all_il_losses).numpy()
if len(il_loss_map) != len(train_dataset):
print(f"Warning: IL map size ({len(il_loss_map)}) does not match"
f" dataset size ({len(train_dataset)}). Check for errors.")
print(f"Computed IL map with shape: {il_loss_map.shape}")
return il_loss_map
# --- 6. DataLoaders and Transforms (Refined: Merge & Manual Split) ---
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset, Subset
import torch
import os
import glob
# --- Hyperparameters ---
DATA_ROOT = './data'
CLOUD_ROOT = './data/task_2_clouds'
MAIN_BATCH_SIZE = 128
VALID_BATCH_SIZE = 256
# --- Custom Transform for Input Noise ---
class AddGaussianNoise(object):
def __init__(self, mean=0., std=1., p=0.5):
self.mean = mean
self.std = std
self.p = p
def __call__(self, tensor):
if torch.rand(1).item() < self.p:
noise = torch.randn(tensor.size()) * self.std + self.mean
return tensor + noise
return tensor
# --- Transforms ---
# 1. CIFAR-100
transform_cifar_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
transform_cifar_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
# 2. MNIST
transform_mnist_train = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
AddGaussianNoise(mean=0., std=0.5, p=1.0)
])
transform_mnist_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
])
# 3. CLOUD
# Note: We apply transforms inside the wrapper class below
transform_cloud_train = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
AddGaussianNoise(mean=0., std=1.0, p=1.0)
])
transform_cloud_test = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# --- Helper Class for Merged Cloud Data ---
class CloudMergedDataset(Dataset):
"""
Reads from both 'clouds_train' and 'clouds_test', merges them,
and allows applying different transforms based on the split.
"""
def __init__(self, root_dir, transform=None):
self.transform = transform
self.samples = []
# 1. Define classes (assume consistent across folders)
# We look into clouds_train to find class names
train_dir = os.path.join(root_dir, 'clouds_train')
self.classes = sorted([d.name for d in os.scandir(train_dir) if d.is_dir()])
self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
# 2. Collect all images from both Train and Test folders
folders_to_scan = [os.path.join(root_dir, 'clouds_train'),
os.path.join(root_dir, 'clouds_test')]
for folder in folders_to_scan:
for cls_name in self.classes:
cls_dir = os.path.join(folder, cls_name)
if not os.path.exists(cls_dir): continue
# Get all images
for img_name in os.listdir(cls_dir):
if img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
self.samples.append((os.path.join(cls_dir, img_name), self.class_to_idx[cls_name]))
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
path, target = self.samples[idx]
# Load image
from PIL import Image
img = Image.open(path).convert('RGB')
if self.transform:
img = self.transform(img)
# Return (data, label, idx) for HASA/RHO-Loss compatibility
return img, target, idx
# --- Create Datasets ---
print("--- Initializing Datasets ---")
# A. CIFAR-100
cifar_train_dataset = NoisyIndexedDataset('CIFAR100', DATA_ROOT, train=True, transform=transform_cifar_train,
noise_type='symmetric', noise_rate=0.4) # 40% noise
cifar_test_dataset = NoisyIndexedDataset('CIFAR100', DATA_ROOT, train=False, transform=transform_cifar_test, noise_type='none')
# Holdout
c_clean = datasets.CIFAR100(root=DATA_ROOT, train=True, transform=transform_cifar_train)
cifar_holdout_dataset = torch.utils.data.Subset(c_clean, list(range(len(c_clean)-10000, len(c_clean))))
# B. MNIST
mnist_train_dataset = NoisyIndexedDataset('MNIST', DATA_ROOT, train=True, transform=transform_mnist_train, noise_type='none')
mnist_test_dataset = NoisyIndexedDataset('MNIST', DATA_ROOT, train=False, transform=transform_mnist_test, noise_type='none')
# Holdout
m_clean = datasets.MNIST(root=DATA_ROOT, train=True, transform=transform_mnist_train)
mnist_holdout_dataset = torch.utils.data.Subset(m_clean, list(range(len(m_clean)-5000, len(m_clean))))
# C. CLOUD (Merged & Split)
print(f"Merging Cloud data from {CLOUD_ROOT}...")
# 1. Load ALL data (raw, no transform yet)
full_cloud_data = CloudMergedDataset(CLOUD_ROOT, transform=None)
total_cloud = len(full_cloud_data)
# 2. Calculate Split Sizes (80% Train, 10% Holdout, 10% Test)
train_size = int(0.8 * total_cloud)
holdout_size = int(0.1 * total_cloud)
test_size = total_cloud - train_size - holdout_size
# 3. Generate Indices
gen = torch.Generator().manual_seed(42)
# We shuffle indices to mix the original train/test folders thoroughly
indices = torch.randperm(total_cloud, generator=gen).tolist()
train_indices = indices[:train_size]
holdout_indices = indices[train_size : train_size + holdout_size]
test_indices = indices[train_size + holdout_size:]
# 4. Create Wrappers with Correct Transforms
# We use Subset to pick indices, then we wrap in a simple class to apply transform
class ApplyTransformSubset(Dataset):
def __init__(self, underlying_dataset, indices, transform):
self.dataset = underlying_dataset
self.indices = indices
self.transform = transform
def __len__(self):
return len(self.indices)
def __getitem__(self, idx):
# Map local subset index to global dataset index
global_idx = self.indices[idx]
# Get raw data (path, target)
path, target = self.dataset.samples[global_idx]
# Load and Transform
from PIL import Image
img = Image.open(path).convert('RGB')
img = self.transform(img)
# Return global_idx to track sample history uniquely
return img, target, global_idx
cloud_train_ds = ApplyTransformSubset(full_cloud_data, train_indices, transform=transform_cloud_train)
cloud_holdout_ds = ApplyTransformSubset(full_cloud_data, holdout_indices, transform=transform_cloud_train)
cloud_test_ds = ApplyTransformSubset(full_cloud_data, test_indices, transform=transform_cloud_test)
# --- Create DataLoaders ---
cifar_train_loader = DataLoader(cifar_train_dataset, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=4)
cifar_holdout_loader = DataLoader(cifar_holdout_dataset, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=4)
cifar_test_loader = DataLoader(cifar_test_dataset, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=4)
mnist_train_loader = DataLoader(mnist_train_dataset, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=4)
mnist_holdout_loader = DataLoader(mnist_holdout_dataset, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=4)
mnist_test_loader = DataLoader(mnist_test_dataset, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=4)
cloud_train_loader = DataLoader(cloud_train_ds, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=4)
cloud_holdout_loader = DataLoader(cloud_holdout_ds, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=4)
cloud_test_loader = DataLoader(cloud_test_ds, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=4)
print(f"Loaders Ready:")
print(f" CIFAR Train: {len(cifar_train_loader)} batches")
print(f" MNIST Train: {len(mnist_train_loader)} batches")
print(f" CLOUD Train: {len(cloud_train_loader)} batches (from {len(cloud_train_ds)} samples)")
print(f" CLOUD Test: {len(cloud_test_loader)} batches (from {len(cloud_test_ds)} samples)")
--- Initializing Datasets --- Applying 40.0% symmetric label noise... Noise applied. Original targets modified. Actual noise rate: 0.4000 Merging Cloud data from ./data/task_2_clouds... Loaders Ready: CIFAR Train: 391 batches MNIST Train: 469 batches CLOUD Train: 6 batches (from 768 samples) CLOUD Test: 1 batches (from 96 samples)
6. Model Architecture¶
We define the neural network architectures. As the thesis focus is on the sampling algorithm and not SOTA architectures, simple models are sufficient.
- MNIST Model: A simple Convolutional Neural Network (CNN).
- CIFAR-100 Model: A simple VGG-style CNN. This will be used for both the main target model and the IL model (for RHO-LOSS).
# --- 7. Model Definitions (Updated) ---
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
# --- Model for MNIST (28x28, 1 Channel) ---
class MNIST_CNN(nn.Module):
def __init__(self):
super(MNIST_CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 7 * 7)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# --- Model for CIFAR-100 (32x32, 3 Channels) ---
# VGG-Small (Optimized for low-res inputs)
class VGG_Small(nn.Module):
def __init__(self, num_classes=100):
super(VGG_Small, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(256 * 4 * 4, 512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# --- Model for CLOUD (224x224, 3 Channels) ---
# ResNet-18 (Standard for medium/high-res inputs)
class Cloud_ResNet18(nn.Module):
def __init__(self, num_classes):
super(Cloud_ResNet18, self).__init__()
# We load a standard ResNet18 structure
# weights=None implies training from scratch (standard for optimization benchmarks)
self.model = models.resnet18(weights=None)
# Replace the final Fully Connected layer to match Cloud classes
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, num_classes)
def forward(self, x):
return self.model(x)
print("Model architectures defined: MNIST_CNN, VGG_Small, Cloud_ResNet18")
Model architectures defined: MNIST_CNN, VGG_Small, Cloud_ResNet18
7. Main Training & Evaluation Loop¶
Now we define the helper functions for evaluation and the main training runner.
validatefunction: A standard loop to compute loss and accuracy on the (clean) test/validation set.run_training_experimentfunction: This is the main wrapper. It handles:- Initializing the model, optimizer, and losses.
- Running the training for
num_epochs. - A
debug_epochsflag to run for just a few epochs (e.g., 3) to ensure the pipeline works without waiting. - Calling the correct sub-routine (
train_uniform_sgd,train_mkl_sgd, ortrain_rho_loss) based on a string argumentalgorithm. - Calling the
validatefunction after each epoch.
# --- 8. Validation Function ---
def validate(model, test_loader, criterion, device):
"""Standard validation loop."""
model.eval() # Set the model to evaluation mode
running_loss = 0.0
correct_samples = 0
total_samples = 0
with torch.no_grad():
# Note: We don't need the index '_' here, but the loader provides it
for inputs, labels, _ in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels) # Use mean reduction loss
running_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_samples += (predicted == labels).sum().item()
epoch_loss = running_loss / total_samples
epoch_acc = correct_samples / total_samples
return epoch_loss, epoch_acc
# --- 8b. Helper Function: Train IL Model (Updated) ---
import torch.optim as optim
def train_il_model(il_model, holdout_loader, test_loader, device, num_epochs=50):
"""
Simple helper to train the Irreducible Loss (IL) model.
Handles holdout loaders with either (data, label) or (data, label, idx).
"""
il_model.to(device)
il_criterion = nn.CrossEntropyLoss().to(device)
il_optimizer = optim.Adam(il_model.parameters(), lr=0.001)
print("--- Training IL Model on Holdout Set ---")
best_val_loss = float('inf')
# Internal validation function
def validate_il(model, test_loader, criterion, device):
model.eval()
running_loss = 0.0
correct, total = 0, 0
with torch.no_grad():
# Handle variable unpacking for test_loader too (just in case)
for batch in test_loader:
inputs = batch[0].to(device)
labels = batch[1].to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
return running_loss / total, correct / total
for epoch in range(num_epochs):
il_model.train()
# --- FIXED LOOP ---
# We fetch the whole batch tuple first, then extract inputs/labels.
# This works for CIFAR (len=2) and Cloud (len=3)
for batch in holdout_loader:
inputs = batch[0].to(device)
labels = batch[1].to(device)
il_optimizer.zero_grad()
outputs = il_model(inputs)
loss = il_criterion(outputs, labels)
loss.backward()
il_optimizer.step()
# Validate on the *real* test set
val_loss, val_acc = validate_il(il_model, test_loader, il_criterion, device)
# Print every 10 epochs or first/last to reduce clutter
if (epoch + 1) % 10 == 0 or epoch == 0:
print(f"IL Model Epoch {epoch+1}/{num_epochs} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%")
if val_loss < best_val_loss:
best_val_loss = val_loss
# Optional: Save best model state here if needed
# torch.save(il_model.state_dict(), 'best_il_model.pth')
print(f"--- IL Model Training Complete. Best Val Loss: {best_val_loss:.4f} ---")
return il_model
# --- 9. Main Training Runner (Updated with RHO-Loss Logic) ---
def train_rho_loss(model, il_map, train_loader, criterion_nored, optimizer, device, selection_ratio):
"""
RHO-LOSS Training Step (Defined here for completeness).
Selects top samples where (L_current - L_irreducible) is highest.
"""
model.train()
running_loss = 0.0
correct, total, selected_total = 0, 0, 0
for inputs, labels, indices in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
# 1. Compute Current Loss (without reduction)
optimizer.zero_grad()
outputs = model(inputs)
loss_current = criterion_nored(outputs, labels) # shape: [B]
# 2. Retrieve Irreducible Loss for these indices
# indices is a tensor of shape [B], use it to query the map
loss_irreducible = torch.tensor(il_map[indices.cpu()], device=device)
# 3. Compute Reducible Loss (L_red = L_cur - L_il)
loss_reducible = loss_current - loss_irreducible
# 4. Selection (Top-k percent)
batch_size = inputs.size(0)
num_keep = max(1, int(batch_size * selection_ratio))
# We want samples with HIGHEST reducible loss (most learnable)
# sort descending
top_scores, top_idx = torch.topk(loss_reducible, k=num_keep, largest=True)
# 5. Backward pass on SELECTED samples only
# We need to re-compute loss or index into the computed graph
# Indexing into loss_current maintains the graph
selected_loss = loss_current[top_idx]
final_loss = selected_loss.mean()
final_loss.backward()
optimizer.step()
running_loss += selected_loss.sum().item()
selected_total += num_keep
# Acc stats (on full batch for fairness)
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (pred == labels).sum().item()
return running_loss / selected_total, correct / total
def run_training_experiment(
algorithm,
model,
train_loader,
test_loader,
criterion,
criterion_nored,
optimizer,
device,
num_epochs=100,
debug_epochs=None,
checkpoint_path=None,
mkl_k_ratio=2.0,
rho_il_map=None,
rho_selection_ratio=0.1
):
"""
Main training loop with checkpointing.
"""
start_epoch = 0
best_val_acc = 0.0
train_losses, train_accs = [], []
val_losses, val_accs = [], []
# --- 1. Load Checkpoint ---
if checkpoint_path and os.path.exists(checkpoint_path):
print(f"Loading checkpoint from: {checkpoint_path}")
try:
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch'] + 1
best_val_acc = checkpoint.get('best_val_acc', 0.0)
train_losses = checkpoint.get('train_losses', [])
train_accs = checkpoint.get('train_accs', [])
val_losses = checkpoint.get('val_losses', [])
val_accs = checkpoint.get('val_accs', [])
print(f"Resuming training from Epoch {start_epoch}")
except Exception as e:
print(f"Error loading checkpoint, starting from scratch. Error: {e}")
start_epoch = 0
else:
print("No checkpoint found. Starting from scratch.")
# Debug Mode Override
epochs_to_run = num_epochs
if debug_epochs:
print(f"--- DEBUG MODE: Running {debug_epochs} epochs ---")
epochs_to_run = debug_epochs
start_epoch = 0
train_losses, train_accs, val_losses, val_accs = [], [], [], []
print(f"--- Starting Training: {algorithm} ---")
for epoch in range(start_epoch, epochs_to_run):
# --- Algorithm Select ---
if algorithm == 'uniform_sgd':
# Helper for Uniform SGD
model.train()
r_loss, correct, total = 0.0, 0, 0
for batch in train_loader:
inputs, labels = batch[0].to(device), batch[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
r_loss += loss.item() * inputs.size(0)
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (pred == labels).sum().item()
train_loss, train_acc = r_loss/total, correct/total
elif algorithm == 'mkl_sgd':
# Helper for MKL-SGD
model.train()
r_loss, correct, total, sel_total = 0.0, 0, 0, 0
for batch in train_loader:
inputs, labels = batch[0].to(device), batch[1].to(device)
batch_size = inputs.size(0)
num_to_select = max(1, int(batch_size / mkl_k_ratio))
optimizer.zero_grad()
outputs = model(inputs)
per_sample_loss = criterion_nored(outputs, labels)
# Sort small->large (Easy samples first)
sorted_loss, _ = torch.sort(per_sample_loss)
selected_loss = sorted_loss[:num_to_select]
mean_loss = selected_loss.mean()
mean_loss.backward()
optimizer.step()
r_loss += selected_loss.sum().item()
sel_total += num_to_select
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (pred == labels).sum().item()
train_loss, train_acc = r_loss/sel_total, correct/total
elif algorithm == 'rho_loss':
if rho_il_map is None:
raise ValueError("rho_il_map must be provided for RHO-LOSS")
train_loss, train_acc = train_rho_loss(
model, rho_il_map, train_loader, criterion_nored, optimizer, device, rho_selection_ratio
)
else:
raise ValueError(f"Unknown algorithm: {algorithm}")
# --- Validation ---
val_loss, val_acc = validate(model, test_loader, criterion, device)
# History
train_losses.append(train_loss)
train_accs.append(train_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
print(f"Epoch {epoch+1}/{epochs_to_run} | Tr Loss: {train_loss:.4f} Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
# --- Checkpoint ---
is_best = val_acc > best_val_acc
if is_best: best_val_acc = val_acc
if checkpoint_path and not debug_epochs:
state = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'best_val_acc': best_val_acc,
'train_losses': train_losses, 'train_accs': train_accs,
'val_losses': val_losses, 'val_accs': val_accs
}
torch.save(state, checkpoint_path)
if is_best:
torch.save(state, checkpoint_path.replace('.pth', '_best.pth'))
print(f" [New Best] Saved to {checkpoint_path.replace('.pth', '_best.pth')}")
print(f"--- Finished: {algorithm} ---")
return {'train_loss': train_losses, 'train_acc': train_accs, 'val_loss': val_losses, 'val_acc': val_accs}
# --- 10a. Combined Data & Loss Setup (REVISED: std=1.0, 60/40 Split) ---
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset, random_split
import torch
import os
print("--- Defining REVISED HARD MODE datasets (std=1.0, 60/40 Split) ---")
# --- Global Settings ---
MAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 256
DATA_ROOT = './data'
CLOUD_ROOT = './data/task_2_clouds'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# --- Helper: Gaussian Noise ---
class AddGaussianNoise(object):
def __init__(self, mean=0., std=1., p=0.5):
self.mean = mean
self.std = std
self.p = p
def __call__(self, tensor):
if torch.rand(1).item() < self.p:
noise = torch.randn(tensor.size()) * self.std + self.mean
return tensor + noise
return tensor
# --- Helper: Merge Dataset (Needed to remix the 50/50 folder split) ---
class CloudMergedDataset(Dataset):
def __init__(self, root_dir):
self.samples = []
# Scan both train and test folders
train_dir = os.path.join(root_dir, 'clouds_train')
test_dir = os.path.join(root_dir, 'clouds_test')
# Get classes from train dir
self.classes = sorted([d.name for d in os.scandir(train_dir) if d.is_dir()])
self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
# Collect images
for folder in [train_dir, test_dir]:
for cls_name in self.classes:
cls_dir = os.path.join(folder, cls_name)
if not os.path.exists(cls_dir): continue
for img_name in os.listdir(cls_dir):
if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
self.samples.append((os.path.join(cls_dir, img_name), self.class_to_idx[cls_name]))
def __len__(self): return len(self.samples)
def __getitem__(self, idx):
# Returns raw path and target (transforms applied later)
return self.samples[idx]
# --- Helper: Apply Transforms Wrapper ---
class ApplyTransformSubset(Dataset):
def __init__(self, dataset, indices, transform):
self.dataset = dataset
self.indices = indices
self.transform = transform
def __len__(self): return len(self.indices)
def __getitem__(self, idx):
global_idx = self.indices[idx]
path, target = self.dataset[global_idx]
from PIL import Image
img = Image.open(path).convert('RGB')
img = self.transform(img)
return img, target, global_idx
# ==========================================
# 1. TASK 3: CLOUD SETUP (The Update)
# ==========================================
# TRAIN TRANSFORM: Gaussian Noise with std=1.0
transform_cloud_train_hard = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
AddGaussianNoise(mean=0., std=1.0, p=1.0) # <--- UPDATED to 1.0
])
# TEST TRANSFORM: Clean
transform_cloud_test_clean = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# MERGE & SPLIT (60% Train Side / 40% Test Side)
full_cloud_data = CloudMergedDataset(CLOUD_ROOT)
total_cloud = len(full_cloud_data)
# We need a Clean Holdout for RHO.
# Plan: 50% Noisy Train + 10% Clean Holdout = 60% Training Data.
# 40% Clean Test.
train_size = int(0.50 * total_cloud)
holdout_size = int(0.10 * total_cloud)
test_size = total_cloud - train_size - holdout_size
gen = torch.Generator().manual_seed(42)
indices = torch.randperm(total_cloud, generator=gen).tolist()
train_indices = indices[:train_size]
holdout_indices = indices[train_size : train_size + holdout_size]
test_indices = indices[train_size + holdout_size:]
# Create Datasets
cloud_train_ds = ApplyTransformSubset(full_cloud_data, train_indices, transform=transform_cloud_train_hard)
cloud_holdout_ds = ApplyTransformSubset(full_cloud_data, holdout_indices, transform=transform_cloud_test_clean) # Clean Holdout
cloud_test_ds = ApplyTransformSubset(full_cloud_data, test_indices, transform=transform_cloud_test_clean)
# Loaders
cloud_train_loader = DataLoader(cloud_train_ds, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=0)
cloud_holdout_loader = DataLoader(cloud_holdout_ds, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=0)
cloud_test_loader = DataLoader(cloud_test_ds, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=0)
criterion_cloud = torch.nn.CrossEntropyLoss().to(device)
criterion_nored_cloud = torch.nn.CrossEntropyLoss(reduction='none').to(device)
print(f"--- CLOUD CONFIGURATION ---")
print(f"Noise Level: Gaussian std=1.0")
print(f"Total Samples: {total_cloud}")
print(f"Split: Train {len(cloud_train_ds)} ({len(cloud_train_ds)/total_cloud:.1%}) | "
f"Holdout {len(cloud_holdout_ds)} ({len(cloud_holdout_ds)/total_cloud:.1%}) | "
f"Test {len(cloud_test_ds)} ({len(cloud_test_ds)/total_cloud:.1%})")
# ==========================================
# 2. Re-Confirming CIFAR/MNIST (Standard Hard Mode)
# ==========================================
# (These remain the same as before, re-defining briefly to keep variables alive)
# CIFAR
transform_cifar_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])
transform_cifar_test = transforms.Compose([
transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])
cifar_train_dataset = NoisyIndexedDataset('CIFAR100', DATA_ROOT, train=True, transform=transform_cifar_train,
noise_type='symmetric', noise_rate=0.4)
cifar_test_dataset = NoisyIndexedDataset('CIFAR100', DATA_ROOT, train=False, transform=transform_cifar_test, noise_type='none')
# Holdout
c_clean = datasets.CIFAR100(root=DATA_ROOT, train=True, transform=transform_cifar_train)
c_hold_idx = list(range(len(c_clean)-10000, len(c_clean)))
cifar_holdout_dataset = torch.utils.data.Subset(c_clean, c_hold_idx)
cifar_train_loader = DataLoader(cifar_train_dataset, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=0)
cifar_test_loader = DataLoader(cifar_test_dataset, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=0)
cifar_holdout_loader_simple = DataLoader(cifar_holdout_dataset, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=0)
criterion_cifar = torch.nn.CrossEntropyLoss().to(device)
criterion_nored_cifar = torch.nn.CrossEntropyLoss(reduction='none').to(device)
# MNIST (Keeping std=1.5 for MNIST as agreed, or change to 1.0 if desired. Defaulting to 1.5 Hard Mode)
transform_mnist_train_hard = transforms.Compose([
transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)),
AddGaussianNoise(mean=0., std=1.5, p=1.0)
])
transform_mnist_test_clean = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
mnist_train_dataset = NoisyIndexedDataset('MNIST', DATA_ROOT, train=True, transform=transform_mnist_train_hard, noise_type='none')
mnist_test_dataset = NoisyIndexedDataset('MNIST', DATA_ROOT, train=False, transform=transform_mnist_test_clean, noise_type='none')
# Holdout
m_clean = datasets.MNIST(root=DATA_ROOT, train=True, transform=transform_mnist_test_clean)
m_hold_idx = list(range(len(m_clean)-5000, len(m_clean)))
mnist_holdout_dataset = torch.utils.data.Subset(m_clean, m_hold_idx)
mnist_train_loader = DataLoader(mnist_train_dataset, batch_size=MAIN_BATCH_SIZE, shuffle=True, num_workers=0)
mnist_test_loader = DataLoader(mnist_test_dataset, batch_size=VALID_BATCH_SIZE, shuffle=False, num_workers=0)
mnist_holdout_loader_simple = DataLoader(mnist_holdout_dataset, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=0)
criterion_mnist = torch.nn.CrossEntropyLoss().to(device)
criterion_nored_mnist = torch.nn.CrossEntropyLoss(reduction='none').to(device)
print("--- Data Setup Complete ---")
--- Defining REVISED HARD MODE datasets (std=1.0, 60/40 Split) --- --- CLOUD CONFIGURATION --- Noise Level: Gaussian std=1.0 Total Samples: 960 Split: Train 480 (50.0%) | Holdout 96 (10.0%) | Test 384 (40.0%) Applying 40.0% symmetric label noise... Noise applied. Original targets modified. Actual noise rate: 0.4000 --- Data Setup Complete ---
# --- 10b. RHO-LOSS Pre-computation (HARD MODE + CLOUD) ---
# [ UPDATED: Saves to 'ThesisCheckpoints_v2' ]
import numpy as np
import os
import torch
# Check if we are in Google Colab for the base path
try:
import google.colab
BASE_PATH = '/content/drive/MyDrive'
except ImportError:
BASE_PATH = '.'
# --- NEW DIRECTORY for Hard Mode ---
checkpoint_dir = os.path.join(BASE_PATH, 'ThesisCheckpoints_v2')
if not os.path.exists(checkpoint_dir):
print(f"Creating new checkpoint directory: {checkpoint_dir}")
os.makedirs(checkpoint_dir)
else:
print(f"Using existing checkpoint directory: {checkpoint_dir}")
# ==========================================
# 1. CIFAR-100 Pre-computation
# ==========================================
print("\n" + "="*50)
print("Processing: CIFAR-100 (Hard Mode)")
print("="*50)
il_model_cifar_path = os.path.join(checkpoint_dir, "il_model_cifar.pth")
il_map_cifar_path = os.path.join(checkpoint_dir, "il_map_cifar.npy")
il_model_cifar = VGG_Small(num_classes=100).to(device)
if os.path.exists(il_model_cifar_path):
print(f"Loading existing IL MODEL from: {il_model_cifar_path}")
il_model_cifar.load_state_dict(torch.load(il_model_cifar_path, map_location=device))
else:
print(f"No IL model found. Training new model...")
# Train for 100 epochs on clean holdout
il_model_cifar = train_il_model(
il_model_cifar, cifar_holdout_loader_simple, cifar_test_loader, device, num_epochs=100
)
print(f"Training complete. Saving IL model...")
torch.save(il_model_cifar.state_dict(), il_model_cifar_path)
if os.path.exists(il_map_cifar_path):
print(f"Loading existing IL MAP from: {il_map_cifar_path}")
il_map_cifar = np.load(il_map_cifar_path)
else:
print(f"No IL map found. Computing new map...")
il_map_cifar = compute_irreducible_loss(
il_model_cifar, cifar_train_dataset, criterion_nored_cifar, device
)
print(f"Computation complete. Saving IL map...")
np.save(il_map_cifar_path, il_map_cifar)
# ==========================================
# 2. MNIST Pre-computation
# ==========================================
print("\n" + "="*50)
print("Processing: MNIST (Hard Mode)")
print("="*50)
il_model_mnist_path = os.path.join(checkpoint_dir, "il_model_mnist.pth")
il_map_mnist_path = os.path.join(checkpoint_dir, "il_map_mnist.npy")
il_model_mnist = MNIST_CNN().to(device)
if os.path.exists(il_model_mnist_path):
print(f"Loading existing IL MODEL from: {il_model_mnist_path}")
il_model_mnist.load_state_dict(torch.load(il_model_mnist_path, map_location=device))
else:
print(f"No IL model found. Training new model...")
# MNIST converges fast, 30 epochs is usually enough for IL
il_model_mnist = train_il_model(
il_model_mnist, mnist_holdout_loader_simple, mnist_test_loader, device, num_epochs=30
)
print(f"Training complete. Saving IL model...")
torch.save(il_model_mnist.state_dict(), il_model_mnist_path)
if os.path.exists(il_map_mnist_path):
print(f"Loading existing IL MAP from: {il_map_mnist_path}")
il_map_mnist = np.load(il_map_mnist_path)
else:
print(f"No IL map found. Computing new map...")
il_map_mnist = compute_irreducible_loss(
il_model_mnist, mnist_train_dataset, criterion_nored_mnist, device
)
print(f"Computation complete. Saving IL map...")
np.save(il_map_mnist_path, il_map_mnist)
# ==========================================
# 3. CLOUD Pre-computation (NEW)
# ==========================================
print("\n" + "="*50)
print("Processing: CLOUD (Hard Mode)")
print("="*50)
il_model_cloud_path = os.path.join(checkpoint_dir, "il_model_cloud.pth")
il_map_cloud_path = os.path.join(checkpoint_dir, "il_map_cloud.npy")
# Dynamically get number of classes from the merged dataset
num_cloud_classes = len(full_cloud_data.classes)
il_model_cloud = Cloud_ResNet18(num_classes=num_cloud_classes).to(device)
if os.path.exists(il_model_cloud_path):
print(f"Loading existing IL MODEL from: {il_model_cloud_path}")
il_model_cloud.load_state_dict(torch.load(il_model_cloud_path, map_location=device))
else:
print(f"No IL model found. Training new model...")
# Cloud dataset is small, so we might need decent epochs, but ResNet converges well.
# We use 'cloud_holdout_loader' (which is CLEAN) for training the IL model
il_model_cloud = train_il_model(
il_model_cloud, cloud_holdout_loader, cloud_test_loader, device, num_epochs=50
)
print(f"Training complete. Saving IL model...")
torch.save(il_model_cloud.state_dict(), il_model_cloud_path)
if os.path.exists(il_map_cloud_path):
print(f"Loading existing IL MAP from: {il_map_cloud_path}")
il_map_cloud = np.load(il_map_cloud_path)
else:
print(f"No IL map found. Computing new map...")
# We compute the map on the NOISY training set ('cloud_train_ds')
# Note: 'cloud_train_ds' is an ApplyTransformSubset, but our compute_irreducible_loss
# wraps it in a DataLoader, so it works perfectly.
il_map_cloud = compute_irreducible_loss(
il_model_cloud, cloud_train_ds, criterion_nored_cloud, device
)
print(f"Computation complete. Saving IL map...")
np.save(il_map_cloud_path, il_map_cloud)
print("\n--- All Pre-computation Complete ---")
Using existing checkpoint directory: ./ThesisCheckpoints_v2 ================================================== Processing: CIFAR-100 (Hard Mode) ================================================== Loading existing IL MODEL from: ./ThesisCheckpoints_v2/il_model_cifar.pth Loading existing IL MAP from: ./ThesisCheckpoints_v2/il_map_cifar.npy ================================================== Processing: MNIST (Hard Mode) ================================================== Loading existing IL MODEL from: ./ThesisCheckpoints_v2/il_model_mnist.pth Loading existing IL MAP from: ./ThesisCheckpoints_v2/il_map_mnist.npy ================================================== Processing: CLOUD (Hard Mode) ================================================== Loading existing IL MODEL from: ./ThesisCheckpoints_v2/il_model_cloud.pth Loading existing IL MAP from: ./ThesisCheckpoints_v2/il_map_cloud.npy --- All Pre-computation Complete ---
# ---10.c REPAIR KIT: Updated Model & Fixed Pre-computation ---
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
from tqdm import tqdm
import os
# --- 1. Update Model: Use Pretrained Weights (Solves Overfitting) ---
class Cloud_ResNet18(nn.Module):
def __init__(self, num_classes):
super(Cloud_ResNet18, self).__init__()
# Use ImageNet weights (Transfer Learning) to handle the small dataset size
# This drastically improves generalization compared to training from scratch
self.model = models.resnet18(weights='IMAGENET1K_V1')
# Replace the final Fully Connected layer
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, num_classes)
def forward(self, x):
return self.model(x)
print("Updated Cloud_ResNet18 to use Pretrained Weights.")
# --- 2. Fix Pre-computation: Global Indexing (Solves IndexError) ---
# We need to re-run the Cloud Pre-computation logic with a "Global Map" approach
# Settings
checkpoint_dir = './ThesisCheckpoints_v2' # Ensure this matches your path
if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("\n" + "="*50)
print("Re-Processing: CLOUD (Hard Mode) with Global Index Fix")
print("="*50)
# Paths
il_model_cloud_path = os.path.join(checkpoint_dir, "il_model_cloud_fixed.pth")
il_map_cloud_path = os.path.join(checkpoint_dir, "il_map_cloud_fixed.npy")
# Helper to get class count
try:
num_cloud_classes = len(full_cloud_data.classes)
total_samples = len(full_cloud_data) # Global size (approx 1000+)
except NameError:
# Reload if context was lost
full_cloud_data = CloudMergedDataset('./data/task_2_clouds', transform=None)
num_cloud_classes = len(full_cloud_data.classes)
total_samples = len(full_cloud_data)
# A. Train IL Model (Clean Holdout)
il_model_cloud = Cloud_ResNet18(num_classes=num_cloud_classes).to(device)
if os.path.exists(il_model_cloud_path):
print(f"Loading existing FIXED IL MODEL from: {il_model_cloud_path}")
il_model_cloud.load_state_dict(torch.load(il_model_cloud_path, map_location=device))
else:
print(f"Training new IL Model (Pretrained backbone)...")
# We train on holdout (clean). Since it's pretrained, it converges fast.
il_model_cloud = train_il_model(
il_model_cloud, cloud_holdout_loader, cloud_test_loader, device, num_epochs=20
)
torch.save(il_model_cloud.state_dict(), il_model_cloud_path)
# B. Compute Global IL Map
# This fixes the IndexError. We create a map of size [TOTAL_SAMPLES], not just [TRAIN_SAMPLES].
if os.path.exists(il_map_cloud_path):
print(f"Loading existing FIXED IL MAP from: {il_map_cloud_path}")
il_map_cloud = np.load(il_map_cloud_path)
else:
print(f"Computing new GLOBAL IL Map (Size: {total_samples})...")
# 1. Initialize map with -1 (or high value) to catch errors if we access untracked indices
il_map_global = np.ones(total_samples) * -1.0
# 2. Iterate through the TRAIN loader (Hard Mode Noise)
# We must use a loader that returns (data, label, GLOBAL_IDX)
# cloud_train_loader from snippet 10a does exactly this.
il_model_cloud.eval()
criterion_nored = nn.CrossEntropyLoss(reduction='none').to(device)
print("Computing losses...")
with torch.no_grad():
for inputs, labels, indices in tqdm(cloud_train_loader, desc="IL Mapping"):
inputs, labels = inputs.to(device), labels.to(device)
# Forward pass
outputs = il_model_cloud(inputs)
losses = criterion_nored(outputs, labels) # Shape [Batch]
# Place losses into the Global Map at the correct indices
# indices is a tensor of global IDs (e.g., [784, 5, 10...])
for i, global_idx in enumerate(indices):
il_map_global[global_idx.item()] = losses[i].item()
# Check if we missed anything (optional sanity check)
# Note: Only indices present in cloud_train_loader will be filled.
# Holdout/Test indices will remain -1.0, which is fine because we never query RHO loss for them.
valid_entries = (il_map_global != -1.0).sum()
print(f"Map computed. Valid Training Entries: {valid_entries}/{total_samples}")
np.save(il_map_cloud_path, il_map_global)
il_map_cloud = il_map_global
print("--- Cloud Repair Complete ---")
Updated Cloud_ResNet18 to use Pretrained Weights. ================================================== Re-Processing: CLOUD (Hard Mode) with Global Index Fix ================================================== Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/selim/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:15<00:00, 3.10MB/s]
Training new IL Model (Pretrained backbone)... --- Training IL Model on Holdout Set --- IL Model Epoch 1/20 | Val Loss: 1.1831, Val Acc: 62.50% IL Model Epoch 10/20 | Val Loss: 1.5690, Val Acc: 68.75% IL Model Epoch 20/20 | Val Loss: 0.7838, Val Acc: 86.46% --- IL Model Training Complete. Best Val Loss: 0.7838 --- Computing new GLOBAL IL Map (Size: 960)... Computing losses...
IL Mapping: 100%|██████████| 24/24 [00:15<00:00, 1.58it/s]
Map computed. Valid Training Entries: 768/960 --- Cloud Repair Complete ---
# --- 10_b and c combined (run this instead of b and c)
# Final. RHO-LOSS Pre-computation (MASTER VERSION) ---
# Combines: CIFAR/MNIST (Standard) + CLOUD (Pretrained & Global Index Fix)
# Auto-cleans old Cloud checkpoints.
import numpy as np
import os
import torch
import torch.nn as nn
import torchvision.models as models
from tqdm import tqdm
# --- 1. Setup Paths & Clean Up ---
try:
import google.colab
BASE_PATH = '/content/drive/MyDrive'
except ImportError:
BASE_PATH = '.'
checkpoint_dir = os.path.join(BASE_PATH, 'ThesisCheckpoints_v2')
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
print(f"Checkpoint Directory: {checkpoint_dir}")
# !!! AUTO-DELETE OLD CLOUD FILES !!!
# We delete these to force re-computation with the new 60/40 split and Pretrained model
files_to_clean = [
"il_model_cloud.pth", "il_map_cloud.npy",
"il_model_cloud_fixed.pth", "il_map_cloud_fixed.npy"
]
print("--- Cleaning old Cloud checkpoints... ---")
for f in files_to_clean:
path = os.path.join(checkpoint_dir, f)
if os.path.exists(path):
os.remove(path)
print(f"Deleted outdated file: {f}")
print("--- Cleanup Complete. Starting Pre-computation. ---")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# --- 2. Model Definitions ---
# CIFAR VGG
class VGG_Small(nn.Module):
def __init__(self, num_classes=100):
super(VGG_Small, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Linear(256 * 4 * 4, 512), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(512, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# MNIST CNN
class MNIST_CNN(nn.Module):
def __init__(self):
super(MNIST_CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1, 1); self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
self.pool = nn.MaxPool2d(2, 2); self.fc1 = nn.Linear(64*7*7, 128); self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x))); x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64*7*7); x = torch.relu(self.fc1(x)); return self.fc2(x)
# CLOUD ResNet (Pretrained)
class Cloud_ResNet18_Pretrained(nn.Module):
def __init__(self, num_classes):
super(Cloud_ResNet18_Pretrained, self).__init__()
# Use ImageNet weights
self.model = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, num_classes)
def forward(self, x):
return self.model(x)
# --- 3. Processing Blocks ---
# >>> BLOCK A: CIFAR-100 <<<
print("\n" + "="*40 + "\nProcessing: CIFAR-100\n" + "="*40)
il_model_cifar_path = os.path.join(checkpoint_dir, "il_model_cifar.pth")
il_map_cifar_path = os.path.join(checkpoint_dir, "il_map_cifar.npy")
il_model_cifar = VGG_Small(num_classes=100).to(device)
if os.path.exists(il_model_cifar_path):
print("Loading existing CIFAR IL Model.")
il_model_cifar.load_state_dict(torch.load(il_model_cifar_path, map_location=device))
else:
print("Training CIFAR IL Model (100 epochs)...")
train_il_model(il_model_cifar, cifar_holdout_loader_simple, cifar_test_loader, device, num_epochs=100)
torch.save(il_model_cifar.state_dict(), il_model_cifar_path)
if os.path.exists(il_map_cifar_path):
print("Loading existing CIFAR IL Map.")
il_map_cifar = np.load(il_map_cifar_path)
else:
print("Computing CIFAR IL Map...")
il_map_cifar = compute_irreducible_loss(il_model_cifar, cifar_train_dataset, criterion_nored_cifar, device)
np.save(il_map_cifar_path, il_map_cifar)
# >>> BLOCK B: MNIST <<<
print("\n" + "="*40 + "\nProcessing: MNIST\n" + "="*40)
il_model_mnist_path = os.path.join(checkpoint_dir, "il_model_mnist.pth")
il_map_mnist_path = os.path.join(checkpoint_dir, "il_map_mnist.npy")
il_model_mnist = MNIST_CNN().to(device)
if os.path.exists(il_model_mnist_path):
print("Loading existing MNIST IL Model.")
il_model_mnist.load_state_dict(torch.load(il_model_mnist_path, map_location=device))
else:
print("Training MNIST IL Model (30 epochs)...")
train_il_model(il_model_mnist, mnist_holdout_loader_simple, mnist_test_loader, device, num_epochs=30)
torch.save(il_model_mnist.state_dict(), il_model_mnist_path)
if os.path.exists(il_map_mnist_path):
print("Loading existing MNIST IL Map.")
il_map_mnist = np.load(il_map_mnist_path)
else:
print("Computing MNIST IL Map...")
il_map_mnist = compute_irreducible_loss(il_model_mnist, mnist_train_dataset, criterion_nored_mnist, device)
np.save(il_map_mnist_path, il_map_mnist)
# >>> BLOCK C: CLOUD (FIXED VERSION) <<<
print("\n" + "="*40 + "\nProcessing: CLOUD (Global Index Fix)\n" + "="*40)
il_model_cloud_path = os.path.join(checkpoint_dir, "il_model_cloud_fixed.pth")
il_map_cloud_path = os.path.join(checkpoint_dir, "il_map_cloud_fixed.npy")
# Helper to get class count/size
try:
num_cloud_classes = len(full_cloud_data.classes)
total_samples = len(full_cloud_data)
except NameError:
full_cloud_data = CloudMergedDataset(CLOUD_ROOT)
num_cloud_classes = len(full_cloud_data.classes)
total_samples = len(full_cloud_data)
il_model_cloud = Cloud_ResNet18_Pretrained(num_classes=num_cloud_classes).to(device)
# 1. Train Pretrained IL Model
if os.path.exists(il_model_cloud_path):
print(f"Loading existing FIXED Cloud IL Model.")
il_model_cloud.load_state_dict(torch.load(il_model_cloud_path, map_location=device))
else:
print(f"Training Cloud IL Model (Pretrained, 20 epochs)...")
train_il_model(il_model_cloud, cloud_holdout_loader, cloud_test_loader, device, num_epochs=20)
torch.save(il_model_cloud.state_dict(), il_model_cloud_path)
# 2. Compute GLOBAL IL Map
if os.path.exists(il_map_cloud_path):
print(f"Loading existing FIXED Cloud IL Map.")
il_map_cloud = np.load(il_map_cloud_path)
else:
print(f"Computing GLOBAL Cloud IL Map (Size: {total_samples})...")
il_map_global = np.ones(total_samples) * -1.0 # Initialize with -1
il_model_cloud.eval()
criterion_nored = nn.CrossEntropyLoss(reduction='none').to(device)
with torch.no_grad():
for inputs, labels, indices in tqdm(cloud_train_loader, desc="IL Mapping"):
inputs, labels = inputs.to(device), labels.to(device)
outputs = il_model_cloud(inputs)
losses = criterion_nored(outputs, labels)
# Global Indexing Fix
for i, global_idx in enumerate(indices):
il_map_global[global_idx.item()] = losses[i].item()
valid_entries = (il_map_global != -1.0).sum()
print(f"Map computed. Valid Training Entries: {valid_entries}/{total_samples}")
np.save(il_map_cloud_path, il_map_global)
il_map_cloud = il_map_global
print("\n--- ALL Pre-computation Complete. Ready for 11b. ---")
Checkpoint Directory: ./ThesisCheckpoints_v2 --- Cleaning old Cloud checkpoints... --- Deleted outdated file: il_model_cloud.pth Deleted outdated file: il_map_cloud.npy Deleted outdated file: il_model_cloud_fixed.pth Deleted outdated file: il_map_cloud_fixed.npy --- Cleanup Complete. Starting Pre-computation. --- ======================================== Processing: CIFAR-100 ======================================== Loading existing CIFAR IL Model. Loading existing CIFAR IL Map. ======================================== Processing: MNIST ======================================== Loading existing MNIST IL Model. Loading existing MNIST IL Map. ======================================== Processing: CLOUD (Global Index Fix) ======================================== Training Cloud IL Model (Pretrained, 20 epochs)... --- Training IL Model on Holdout Set --- IL Model Epoch 1/20 | Val Loss: 1.1057, Val Acc: 58.85% IL Model Epoch 10/20 | Val Loss: 0.9543, Val Acc: 77.86% IL Model Epoch 20/20 | Val Loss: 0.7416, Val Acc: 85.16% --- IL Model Training Complete. Best Val Loss: 0.7416 --- Computing GLOBAL Cloud IL Map (Size: 960)...
IL Mapping: 100%|██████████| 15/15 [00:10<00:00, 1.48it/s]
Map computed. Valid Training Entries: 480/960 --- ALL Pre-computation Complete. Ready for 11b. ---
# --- 11b. Main Experiment Orchestrator (HARD MODE + CLOUD) ---
# [ REPLACES Snippet 11 ]
import os
import torch.optim as optim
# --- Settings ---
LEARNING_RATE = 0.001
NUM_EPOCHS = 100
DEBUG_EPOCHS = None
# Use the same v2 directory
try:
import google.colab
BASE_PATH = '/content/drive/MyDrive'
except ImportError:
BASE_PATH = '.'
checkpoint_dir = os.path.join(BASE_PATH, 'ThesisCheckpoints_v2')
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
all_experiment_results = {}
# --- Define Experiments ---
experiments_to_run = [
{"run_name": "Uniform SGD", "algo": "uniform_sgd"},
{"run_name": "MKL (k=2.0)", "algo": "mkl_sgd", "k_ratio": 2.0},
{"run_name": "MKL (k=1.5)", "algo": "mkl_sgd", "k_ratio": 1.5},
{"run_name": "MKL (k=1.25)", "algo": "mkl_sgd", "k_ratio": 1.25},
{"run_name": "RHO (sel 20%)", "algo": "rho_loss", "rho_ratio": 0.2},
{"run_name": "RHO (sel 30%)", "algo": "rho_loss", "rho_ratio": 0.3},
{"run_name": "RHO (sel 40%)", "algo": "rho_loss", "rho_ratio": 0.4}
]
# --- Helper to safely get class count for Cloud ---
# Ensure full_cloud_data exists (from Snippet 10a)
try:
cloud_classes = len(full_cloud_data.classes)
except NameError:
# Fallback if 10a cell wasn't run immediately before (though it should be)
print("Warning: full_cloud_data not found. Attempting to reload...")
temp_ds = CloudMergedDataset(CLOUD_ROOT, transform=None)
cloud_classes = len(temp_ds.classes)
# --- Define Tasks (Including Cloud) ---
experiment_tasks = [
{
"task_name_short": "task_1",
"task_name_long": "Task 1: CIFAR-100 (40% Label Noise)",
"model_fn": lambda: VGG_Small(num_classes=100),
"train_loader": cifar_train_loader,
"test_loader": cifar_test_loader,
"criterion": criterion_cifar,
"criterion_nored": criterion_nored_cifar,
"rho_il_map": il_map_cifar
},
{
"task_name_short": "task_2",
"task_name_long": "Task 2: MNIST (Input Noise std=1.5)",
"model_fn": lambda: MNIST_CNN(),
"train_loader": mnist_train_loader,
"test_loader": mnist_test_loader,
"criterion": criterion_mnist,
"criterion_nored": criterion_nored_mnist,
"rho_il_map": il_map_mnist
},
{
"task_name_short": "task_3",
"task_name_long": "Task 3: CLOUD (Input Noise std=1.5)",
# Use lambda to create a FRESH model instance for every experiment run
"model_fn": lambda: Cloud_ResNet18(num_classes=cloud_classes),
"train_loader": cloud_train_loader,
"test_loader": cloud_test_loader,
"criterion": criterion_cloud,
"criterion_nored": criterion_nored_cloud,
"rho_il_map": il_map_cloud
}
]
# --- Main Loop ---
print(f"--- Starting HARD MODE Experiments ---")
print(f"Checkpoints will be saved to: {checkpoint_dir}")
for task in experiment_tasks:
task_name_long = task['task_name_long']
print("\n" + "="*50)
print(f"STARTING: {task_name_long}")
print("="*50)
task_results = {}
for exp_config in experiments_to_run:
algo_name = exp_config['algo']
run_name = exp_config['run_name']
# Sanitize filename
safe_run_name = run_name.replace(' ', '_').replace('(', '').replace(')', '').replace('%', '').replace('.', '_')
chkpt_name = f"{task['task_name_short']}_{safe_run_name}.pth"
chkpt_path = os.path.join(checkpoint_dir, chkpt_name)
print(f"\n--- Running: {run_name} ---")
# Instantiate fresh model and optimizer
model = task['model_fn']().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
results = run_training_experiment(
algorithm=algo_name,
model=model,
train_loader=task['train_loader'],
test_loader=task['test_loader'],
criterion=task['criterion'],
criterion_nored=task['criterion_nored'],
optimizer=optimizer,
device=device,
num_epochs=NUM_EPOCHS,
debug_epochs=DEBUG_EPOCHS,
checkpoint_path=chkpt_path,
# Algorithm Params
rho_il_map = task['rho_il_map'],
mkl_k_ratio = exp_config.get('k_ratio', 2.0),
rho_selection_ratio = exp_config.get('rho_ratio', 0.1)
)
task_results[run_name] = results
all_experiment_results[task_name_long] = task_results
print("\n" + "="*50)
print("--- ALL EXPERIMENTS COMPLETE ---")
print("="*50)
--- Starting HARD MODE Experiments --- Checkpoints will be saved to: ./ThesisCheckpoints_v2 ================================================== STARTING: Task 1: CIFAR-100 (40% Label Noise) ================================================== --- Running: Uniform SGD --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_Uniform_SGD.pth Resuming training from Epoch 100 --- Starting Training: uniform_sgd --- --- Finished: uniform_sgd --- --- Running: MKL (k=2.0) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_MKL_k=2_0.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: MKL (k=1.5) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_MKL_k=1_5.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: MKL (k=1.25) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_MKL_k=1_25.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: RHO (sel 20%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_RHO_sel_20.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- --- Running: RHO (sel 30%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_RHO_sel_30.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- --- Running: RHO (sel 40%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_1_RHO_sel_40.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- ================================================== STARTING: Task 2: MNIST (Input Noise std=1.5) ================================================== --- Running: Uniform SGD --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_Uniform_SGD.pth Resuming training from Epoch 100 --- Starting Training: uniform_sgd --- --- Finished: uniform_sgd --- --- Running: MKL (k=2.0) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_MKL_k=2_0.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: MKL (k=1.5) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_MKL_k=1_5.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: MKL (k=1.25) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_MKL_k=1_25.pth Resuming training from Epoch 100 --- Starting Training: mkl_sgd --- --- Finished: mkl_sgd --- --- Running: RHO (sel 20%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_RHO_sel_20.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- --- Running: RHO (sel 30%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_RHO_sel_30.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- --- Running: RHO (sel 40%) --- Loading checkpoint from: ./ThesisCheckpoints_v2/task_2_RHO_sel_40.pth Resuming training from Epoch 100 --- Starting Training: rho_loss --- --- Finished: rho_loss --- ================================================== STARTING: Task 3: CLOUD (Input Noise std=1.5) ================================================== --- Running: Uniform SGD --- No checkpoint found. Starting from scratch. --- Starting Training: uniform_sgd --- Epoch 1/100 | Tr Loss: 1.2371 Acc: 0.5917 | Val Loss: 12.0415 Acc: 0.0260 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_Uniform_SGD_best.pth Epoch 2/100 | Tr Loss: 0.7448 Acc: 0.7125 | Val Loss: 48.1277 Acc: 0.0208 Epoch 3/100 | Tr Loss: 0.7569 Acc: 0.7271 | Val Loss: 38.6300 Acc: 0.0208 Epoch 4/100 | Tr Loss: 0.5511 Acc: 0.8042 | Val Loss: 3.4613 Acc: 0.1901 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_Uniform_SGD_best.pth Epoch 5/100 | Tr Loss: 0.5525 Acc: 0.7917 | Val Loss: 10.3370 Acc: 0.0417 Epoch 6/100 | Tr Loss: 0.5080 Acc: 0.8063 | Val Loss: 7.0867 Acc: 0.0365 Epoch 7/100 | Tr Loss: 0.5096 Acc: 0.8125 | Val Loss: 8.8274 Acc: 0.1146 Epoch 8/100 | Tr Loss: 0.4777 Acc: 0.8271 | Val Loss: 3.9283 Acc: 0.1562 Epoch 9/100 | Tr Loss: 0.3401 Acc: 0.8979 | Val Loss: 5.7591 Acc: 0.1198 Epoch 10/100 | Tr Loss: 0.4541 Acc: 0.8521 | Val Loss: 2.4032 Acc: 0.3229 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_Uniform_SGD_best.pth Epoch 11/100 | Tr Loss: 0.3854 Acc: 0.8542 | Val Loss: 3.3317 Acc: 0.3047 Epoch 12/100 | Tr Loss: 0.3613 Acc: 0.8771 | Val Loss: 12.2584 Acc: 0.1172 Epoch 13/100 | Tr Loss: 0.2852 Acc: 0.8938 | Val Loss: 3.5522 Acc: 0.2422 Epoch 14/100 | Tr Loss: 0.2975 Acc: 0.8917 | Val Loss: 4.1452 Acc: 0.3151 Epoch 15/100 | Tr Loss: 0.3190 Acc: 0.8938 | Val Loss: 3.7732 Acc: 0.2943 Epoch 16/100 | Tr Loss: 0.2646 Acc: 0.9146 | Val Loss: 6.1347 Acc: 0.2083 Epoch 17/100 | Tr Loss: 0.3113 Acc: 0.9021 | Val Loss: 6.7789 Acc: 0.1693 Epoch 18/100 | Tr Loss: 0.2964 Acc: 0.8854 | Val Loss: 3.6307 Acc: 0.3151 Epoch 19/100 | Tr Loss: 0.2549 Acc: 0.9000 | Val Loss: 4.6572 Acc: 0.2500 Epoch 20/100 | Tr Loss: 0.1773 Acc: 0.9375 | Val Loss: 3.0658 Acc: 0.3385 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_Uniform_SGD_best.pth Epoch 21/100 | Tr Loss: 0.3260 Acc: 0.8729 | Val Loss: 4.2674 Acc: 0.2604 Epoch 22/100 | Tr Loss: 0.3223 Acc: 0.8646 | Val Loss: 5.7640 Acc: 0.2188 Epoch 23/100 | Tr Loss: 0.3763 Acc: 0.8792 | Val Loss: 2.1716 Acc: 0.5703 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_Uniform_SGD_best.pth Epoch 24/100 | Tr Loss: 0.1801 Acc: 0.9354 | Val Loss: 1.8802 Acc: 0.4609 Epoch 25/100 | Tr Loss: 0.2003 Acc: 0.9375 | Val Loss: 1.7923 Acc: 0.5312 Epoch 26/100 | Tr Loss: 0.1754 Acc: 0.9396 | Val Loss: 3.1392 Acc: 0.3542 Epoch 27/100 | Tr Loss: 0.1729 Acc: 0.9271 | Val Loss: 3.6310 Acc: 0.3203 Epoch 28/100 | Tr Loss: 0.1578 Acc: 0.9417 | Val Loss: 4.9135 Acc: 0.2656 Epoch 29/100 | Tr Loss: 0.1703 Acc: 0.9396 | Val Loss: 5.0980 Acc: 0.2578 Epoch 30/100 | Tr Loss: 0.2185 Acc: 0.9271 | Val Loss: 4.2961 Acc: 0.1797 Epoch 31/100 | Tr Loss: 0.1666 Acc: 0.9479 | Val Loss: 5.5855 Acc: 0.1797 Epoch 32/100 | Tr Loss: 0.1430 Acc: 0.9521 | Val Loss: 3.6998 Acc: 0.3021 Epoch 33/100 | Tr Loss: 0.2113 Acc: 0.9292 | Val Loss: 5.6598 Acc: 0.2005 Epoch 34/100 | Tr Loss: 0.1435 Acc: 0.9521 | Val Loss: 2.2648 Acc: 0.4245 Epoch 35/100 | Tr Loss: 0.1229 Acc: 0.9583 | Val Loss: 2.1965 Acc: 0.4245 Epoch 36/100 | Tr Loss: 0.1140 Acc: 0.9604 | Val Loss: 2.2664 Acc: 0.4427 Epoch 37/100 | Tr Loss: 0.1081 Acc: 0.9604 | Val Loss: 2.2442 Acc: 0.4453 Epoch 38/100 | Tr Loss: 0.0952 Acc: 0.9708 | Val Loss: 4.0629 Acc: 0.3151 Epoch 39/100 | Tr Loss: 0.0780 Acc: 0.9750 | Val Loss: 3.6347 Acc: 0.3750 Epoch 40/100 | Tr Loss: 0.1713 Acc: 0.9375 | Val Loss: 2.9417 Acc: 0.4818 Epoch 41/100 | Tr Loss: 0.1725 Acc: 0.9417 | Val Loss: 3.4993 Acc: 0.3385 Epoch 42/100 | Tr Loss: 0.1477 Acc: 0.9437 | Val Loss: 3.2287 Acc: 0.4948 Epoch 43/100 | Tr Loss: 0.1286 Acc: 0.9521 | Val Loss: 4.9951 Acc: 0.3255 Epoch 44/100 | Tr Loss: 0.1770 Acc: 0.9354 | Val Loss: 6.7452 Acc: 0.2578 Epoch 45/100 | Tr Loss: 0.2350 Acc: 0.9229 | Val Loss: 2.3806 Acc: 0.4557 Epoch 46/100 | Tr Loss: 0.1619 Acc: 0.9437 | Val Loss: 6.2200 Acc: 0.3776 Epoch 47/100 | Tr Loss: 0.1345 Acc: 0.9458 | Val Loss: 5.5682 Acc: 0.3099 Epoch 48/100 | Tr Loss: 0.1905 Acc: 0.9396 | Val Loss: 6.3312 Acc: 0.3438 Epoch 49/100 | Tr Loss: 0.1319 Acc: 0.9583 | Val Loss: 6.1708 Acc: 0.3464 Epoch 50/100 | Tr Loss: 0.0789 Acc: 0.9792 | Val Loss: 4.4603 Acc: 0.3333 Epoch 51/100 | Tr Loss: 0.0967 Acc: 0.9583 | Val Loss: 4.2920 Acc: 0.4167 Epoch 52/100 | Tr Loss: 0.0668 Acc: 0.9792 | Val Loss: 4.4960 Acc: 0.3672 Epoch 53/100 | Tr Loss: 0.1346 Acc: 0.9625 | Val Loss: 4.3666 Acc: 0.3672 Epoch 54/100 | Tr Loss: 0.0959 Acc: 0.9625 | Val Loss: 3.3414 Acc: 0.4896 Epoch 55/100 | Tr Loss: 0.2948 Acc: 0.9000 | Val Loss: 7.0017 Acc: 0.3073 Epoch 56/100 | Tr Loss: 0.1937 Acc: 0.9292 | Val Loss: 4.8937 Acc: 0.5312 Epoch 57/100 | Tr Loss: 0.1547 Acc: 0.9500 | Val Loss: 6.3100 Acc: 0.4089 Epoch 58/100 | Tr Loss: 0.1235 Acc: 0.9563 | Val Loss: 5.2554 Acc: 0.3802 Epoch 59/100 | Tr Loss: 0.0853 Acc: 0.9708 | Val Loss: 4.7604 Acc: 0.4115 Epoch 60/100 | Tr Loss: 0.0665 Acc: 0.9771 | Val Loss: 3.5284 Acc: 0.5052 Epoch 61/100 | Tr Loss: 0.0853 Acc: 0.9688 | Val Loss: 3.7514 Acc: 0.4609 Epoch 62/100 | Tr Loss: 0.1148 Acc: 0.9646 | Val Loss: 4.5449 Acc: 0.4453 Epoch 63/100 | Tr Loss: 0.0859 Acc: 0.9646 | Val Loss: 3.5279 Acc: 0.4349 Epoch 64/100 | Tr Loss: 0.0312 Acc: 0.9938 | Val Loss: 3.7746 Acc: 0.4661 Epoch 65/100 | Tr Loss: 0.0633 Acc: 0.9812 | Val Loss: 4.3108 Acc: 0.3854 Epoch 66/100 | Tr Loss: 0.0924 Acc: 0.9688 | Val Loss: 6.4676 Acc: 0.2630 Epoch 67/100 | Tr Loss: 0.1331 Acc: 0.9500 | Val Loss: 9.3199 Acc: 0.2448 Epoch 68/100 | Tr Loss: 0.0855 Acc: 0.9729 | Val Loss: 7.8783 Acc: 0.2578 Epoch 69/100 | Tr Loss: 0.0932 Acc: 0.9646 | Val Loss: 11.9846 Acc: 0.2031 Epoch 70/100 | Tr Loss: 0.0701 Acc: 0.9750 | Val Loss: 11.7243 Acc: 0.2109 Epoch 71/100 | Tr Loss: 0.0753 Acc: 0.9646 | Val Loss: 9.6887 Acc: 0.1901 Epoch 72/100 | Tr Loss: 0.0742 Acc: 0.9771 | Val Loss: 10.6515 Acc: 0.1797 Epoch 73/100 | Tr Loss: 0.0654 Acc: 0.9812 | Val Loss: 11.0123 Acc: 0.2630 Epoch 74/100 | Tr Loss: 0.0330 Acc: 0.9896 | Val Loss: 9.0019 Acc: 0.3073 Epoch 75/100 | Tr Loss: 0.0378 Acc: 0.9917 | Val Loss: 9.5362 Acc: 0.3385 Epoch 76/100 | Tr Loss: 0.0332 Acc: 0.9854 | Val Loss: 9.4864 Acc: 0.2760 Epoch 77/100 | Tr Loss: 0.0524 Acc: 0.9792 | Val Loss: 11.1216 Acc: 0.2083 Epoch 78/100 | Tr Loss: 0.1172 Acc: 0.9542 | Val Loss: 12.2911 Acc: 0.3229 Epoch 79/100 | Tr Loss: 0.1570 Acc: 0.9521 | Val Loss: 11.4558 Acc: 0.2552 Epoch 80/100 | Tr Loss: 0.0812 Acc: 0.9604 | Val Loss: 8.2361 Acc: 0.2500 Epoch 81/100 | Tr Loss: 0.0713 Acc: 0.9771 | Val Loss: 5.2765 Acc: 0.4115 Epoch 82/100 | Tr Loss: 0.0882 Acc: 0.9688 | Val Loss: 10.1538 Acc: 0.2422 Epoch 83/100 | Tr Loss: 0.0572 Acc: 0.9792 | Val Loss: 15.2222 Acc: 0.1849 Epoch 84/100 | Tr Loss: 0.0787 Acc: 0.9750 | Val Loss: 14.3320 Acc: 0.1849 Epoch 85/100 | Tr Loss: 0.0702 Acc: 0.9708 | Val Loss: 11.3187 Acc: 0.1641 Epoch 86/100 | Tr Loss: 0.0846 Acc: 0.9708 | Val Loss: 17.7783 Acc: 0.1484 Epoch 87/100 | Tr Loss: 0.0519 Acc: 0.9854 | Val Loss: 12.8124 Acc: 0.2109 Epoch 88/100 | Tr Loss: 0.0357 Acc: 0.9896 | Val Loss: 10.4317 Acc: 0.3125 Epoch 89/100 | Tr Loss: 0.0459 Acc: 0.9812 | Val Loss: 10.1267 Acc: 0.3490 Epoch 90/100 | Tr Loss: 0.0509 Acc: 0.9812 | Val Loss: 16.5287 Acc: 0.1901 Epoch 91/100 | Tr Loss: 0.0366 Acc: 0.9875 | Val Loss: 19.9575 Acc: 0.1562 Epoch 92/100 | Tr Loss: 0.0295 Acc: 0.9917 | Val Loss: 15.3482 Acc: 0.1797 Epoch 93/100 | Tr Loss: 0.0144 Acc: 0.9979 | Val Loss: 16.7234 Acc: 0.1745 Epoch 94/100 | Tr Loss: 0.0130 Acc: 0.9979 | Val Loss: 16.5123 Acc: 0.1719 Epoch 95/100 | Tr Loss: 0.0198 Acc: 0.9938 | Val Loss: 18.4186 Acc: 0.1562 Epoch 96/100 | Tr Loss: 0.0073 Acc: 1.0000 | Val Loss: 21.3365 Acc: 0.1432 Epoch 97/100 | Tr Loss: 0.0209 Acc: 0.9938 | Val Loss: 22.3280 Acc: 0.1458 Epoch 98/100 | Tr Loss: 0.0281 Acc: 0.9896 | Val Loss: 13.7059 Acc: 0.1745 Epoch 99/100 | Tr Loss: 0.0395 Acc: 0.9896 | Val Loss: 13.3147 Acc: 0.1562 Epoch 100/100 | Tr Loss: 0.0408 Acc: 0.9896 | Val Loss: 12.9483 Acc: 0.1432 --- Finished: uniform_sgd --- --- Running: MKL (k=2.0) --- No checkpoint found. Starting from scratch. --- Starting Training: mkl_sgd --- Epoch 1/100 | Tr Loss: 0.3127 Acc: 0.4938 | Val Loss: 8.1341 Acc: 0.1016 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 2/100 | Tr Loss: 0.1861 Acc: 0.5208 | Val Loss: 2.7156 Acc: 0.2370 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 3/100 | Tr Loss: 0.0591 Acc: 0.5625 | Val Loss: 2.2317 Acc: 0.2344 Epoch 4/100 | Tr Loss: 0.1042 Acc: 0.5354 | Val Loss: 3.0447 Acc: 0.2344 Epoch 5/100 | Tr Loss: 0.1656 Acc: 0.5042 | Val Loss: 3.6269 Acc: 0.2344 Epoch 6/100 | Tr Loss: 0.0679 Acc: 0.5563 | Val Loss: 3.5271 Acc: 0.2344 Epoch 7/100 | Tr Loss: 0.0809 Acc: 0.5458 | Val Loss: 4.0206 Acc: 0.2682 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 8/100 | Tr Loss: 0.1523 Acc: 0.5479 | Val Loss: 13.6013 Acc: 0.2344 Epoch 9/100 | Tr Loss: 0.0816 Acc: 0.5271 | Val Loss: 33.8279 Acc: 0.2344 Epoch 10/100 | Tr Loss: 0.0300 Acc: 0.5958 | Val Loss: 10.2843 Acc: 0.2344 Epoch 11/100 | Tr Loss: 0.0206 Acc: 0.5917 | Val Loss: 7.4352 Acc: 0.2500 Epoch 12/100 | Tr Loss: 0.0059 Acc: 0.6125 | Val Loss: 3.7822 Acc: 0.4089 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 13/100 | Tr Loss: 0.0600 Acc: 0.5917 | Val Loss: 5.6007 Acc: 0.3724 Epoch 14/100 | Tr Loss: 0.0210 Acc: 0.5979 | Val Loss: 11.1903 Acc: 0.2604 Epoch 15/100 | Tr Loss: 0.1255 Acc: 0.5646 | Val Loss: 6.0929 Acc: 0.2865 Epoch 16/100 | Tr Loss: 0.0168 Acc: 0.6417 | Val Loss: 3.8189 Acc: 0.3958 Epoch 17/100 | Tr Loss: 0.0548 Acc: 0.5604 | Val Loss: 3.2135 Acc: 0.4245 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 18/100 | Tr Loss: 0.0149 Acc: 0.6146 | Val Loss: 4.8355 Acc: 0.3438 Epoch 19/100 | Tr Loss: 0.2533 Acc: 0.5000 | Val Loss: 5.9851 Acc: 0.3672 Epoch 20/100 | Tr Loss: 0.0398 Acc: 0.5771 | Val Loss: 5.6177 Acc: 0.1797 Epoch 21/100 | Tr Loss: 0.0153 Acc: 0.6167 | Val Loss: 6.2341 Acc: 0.2188 Epoch 22/100 | Tr Loss: 0.0984 Acc: 0.5750 | Val Loss: 4.8991 Acc: 0.4349 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 23/100 | Tr Loss: 0.0968 Acc: 0.5500 | Val Loss: 5.4225 Acc: 0.3958 Epoch 24/100 | Tr Loss: 0.1540 Acc: 0.5250 | Val Loss: 3.6692 Acc: 0.2865 Epoch 25/100 | Tr Loss: 0.0464 Acc: 0.5708 | Val Loss: 5.6682 Acc: 0.2422 Epoch 26/100 | Tr Loss: 0.1050 Acc: 0.5750 | Val Loss: 3.4920 Acc: 0.3984 Epoch 27/100 | Tr Loss: 0.2426 Acc: 0.5146 | Val Loss: 5.7573 Acc: 0.3750 Epoch 28/100 | Tr Loss: 0.1982 Acc: 0.5021 | Val Loss: 58.7807 Acc: 0.2474 Epoch 29/100 | Tr Loss: 0.1900 Acc: 0.5229 | Val Loss: 16.3427 Acc: 0.3568 Epoch 30/100 | Tr Loss: 0.0645 Acc: 0.5771 | Val Loss: 9.6503 Acc: 0.2995 Epoch 31/100 | Tr Loss: 0.0282 Acc: 0.6396 | Val Loss: 4.8026 Acc: 0.3594 Epoch 32/100 | Tr Loss: 0.0075 Acc: 0.6687 | Val Loss: 4.8202 Acc: 0.3750 Epoch 33/100 | Tr Loss: 0.0027 Acc: 0.6604 | Val Loss: 5.0320 Acc: 0.3828 Epoch 34/100 | Tr Loss: 0.0104 Acc: 0.6500 | Val Loss: 4.7625 Acc: 0.3672 Epoch 35/100 | Tr Loss: 0.0083 Acc: 0.6458 | Val Loss: 6.0876 Acc: 0.3490 Epoch 36/100 | Tr Loss: 0.0319 Acc: 0.6458 | Val Loss: 5.1802 Acc: 0.3880 Epoch 37/100 | Tr Loss: 0.0233 Acc: 0.6250 | Val Loss: 6.1573 Acc: 0.3568 Epoch 38/100 | Tr Loss: 0.0073 Acc: 0.6375 | Val Loss: 6.3740 Acc: 0.3490 Epoch 39/100 | Tr Loss: 0.0038 Acc: 0.6646 | Val Loss: 5.5590 Acc: 0.3385 Epoch 40/100 | Tr Loss: 0.0049 Acc: 0.6500 | Val Loss: 5.7349 Acc: 0.3281 Epoch 41/100 | Tr Loss: 0.0015 Acc: 0.6625 | Val Loss: 5.9911 Acc: 0.3151 Epoch 42/100 | Tr Loss: 0.0013 Acc: 0.6604 | Val Loss: 6.2860 Acc: 0.3203 Epoch 43/100 | Tr Loss: 0.0436 Acc: 0.6250 | Val Loss: 17.1736 Acc: 0.2760 Epoch 44/100 | Tr Loss: 0.0308 Acc: 0.6146 | Val Loss: 3.6082 Acc: 0.4714 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 45/100 | Tr Loss: 0.0237 Acc: 0.5875 | Val Loss: 5.3424 Acc: 0.3099 Epoch 46/100 | Tr Loss: 0.0779 Acc: 0.5792 | Val Loss: 9.0398 Acc: 0.4896 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 47/100 | Tr Loss: 0.3869 Acc: 0.4771 | Val Loss: 10.4353 Acc: 0.4089 Epoch 48/100 | Tr Loss: 0.0697 Acc: 0.5729 | Val Loss: 5.8554 Acc: 0.3177 Epoch 49/100 | Tr Loss: 0.0296 Acc: 0.6125 | Val Loss: 10.7172 Acc: 0.2109 Epoch 50/100 | Tr Loss: 0.0089 Acc: 0.6271 | Val Loss: 8.5690 Acc: 0.2734 Epoch 51/100 | Tr Loss: 0.1503 Acc: 0.5792 | Val Loss: 10.2917 Acc: 0.2786 Epoch 52/100 | Tr Loss: 0.0692 Acc: 0.5750 | Val Loss: 9.4306 Acc: 0.3151 Epoch 53/100 | Tr Loss: 0.0217 Acc: 0.5833 | Val Loss: 10.1620 Acc: 0.3151 Epoch 54/100 | Tr Loss: 0.0133 Acc: 0.6250 | Val Loss: 10.6816 Acc: 0.3281 Epoch 55/100 | Tr Loss: 0.0139 Acc: 0.6125 | Val Loss: 9.5655 Acc: 0.3255 Epoch 56/100 | Tr Loss: 0.0065 Acc: 0.6146 | Val Loss: 11.7397 Acc: 0.2682 Epoch 57/100 | Tr Loss: 0.0405 Acc: 0.5917 | Val Loss: 16.8424 Acc: 0.2344 Epoch 58/100 | Tr Loss: 0.0567 Acc: 0.5938 | Val Loss: 8.6645 Acc: 0.2292 Epoch 59/100 | Tr Loss: 0.1629 Acc: 0.4938 | Val Loss: 5.4683 Acc: 0.4661 Epoch 60/100 | Tr Loss: 0.0621 Acc: 0.5979 | Val Loss: 7.0739 Acc: 0.3984 Epoch 61/100 | Tr Loss: 0.0521 Acc: 0.5667 | Val Loss: 10.0982 Acc: 0.4089 Epoch 62/100 | Tr Loss: 0.0187 Acc: 0.6083 | Val Loss: 7.3245 Acc: 0.4766 Epoch 63/100 | Tr Loss: 0.0327 Acc: 0.6229 | Val Loss: 5.5686 Acc: 0.5182 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 64/100 | Tr Loss: 0.0045 Acc: 0.6292 | Val Loss: 4.7643 Acc: 0.5469 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 65/100 | Tr Loss: 0.0007 Acc: 0.6646 | Val Loss: 4.5428 Acc: 0.5495 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 66/100 | Tr Loss: 0.0005 Acc: 0.6854 | Val Loss: 4.5791 Acc: 0.5469 Epoch 67/100 | Tr Loss: 0.0033 Acc: 0.6542 | Val Loss: 4.6677 Acc: 0.5495 Epoch 68/100 | Tr Loss: 0.0070 Acc: 0.6562 | Val Loss: 4.6345 Acc: 0.5286 Epoch 69/100 | Tr Loss: 0.0146 Acc: 0.6271 | Val Loss: 4.3430 Acc: 0.5208 Epoch 70/100 | Tr Loss: 0.0145 Acc: 0.6500 | Val Loss: 5.7957 Acc: 0.5573 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=2_0_best.pth Epoch 71/100 | Tr Loss: 0.0084 Acc: 0.6458 | Val Loss: 6.7115 Acc: 0.5234 Epoch 72/100 | Tr Loss: 0.0027 Acc: 0.6458 | Val Loss: 9.3922 Acc: 0.4635 Epoch 73/100 | Tr Loss: 0.0391 Acc: 0.6062 | Val Loss: 32.1112 Acc: 0.3802 Epoch 74/100 | Tr Loss: 0.1036 Acc: 0.5417 | Val Loss: 8.0369 Acc: 0.5495 Epoch 75/100 | Tr Loss: 0.0111 Acc: 0.6104 | Val Loss: 14.5989 Acc: 0.5052 Epoch 76/100 | Tr Loss: 0.1197 Acc: 0.5729 | Val Loss: 13.8402 Acc: 0.4870 Epoch 77/100 | Tr Loss: 0.1419 Acc: 0.5188 | Val Loss: 4.8463 Acc: 0.4922 Epoch 78/100 | Tr Loss: 0.0615 Acc: 0.5542 | Val Loss: 5.9119 Acc: 0.3802 Epoch 79/100 | Tr Loss: 0.0237 Acc: 0.6042 | Val Loss: 6.6724 Acc: 0.3906 Epoch 80/100 | Tr Loss: 0.0115 Acc: 0.6312 | Val Loss: 5.6548 Acc: 0.4427 Epoch 81/100 | Tr Loss: 0.0188 Acc: 0.6188 | Val Loss: 5.6777 Acc: 0.4583 Epoch 82/100 | Tr Loss: 0.0133 Acc: 0.6271 | Val Loss: 6.3993 Acc: 0.4714 Epoch 83/100 | Tr Loss: 0.0054 Acc: 0.6438 | Val Loss: 6.8332 Acc: 0.4531 Epoch 84/100 | Tr Loss: 0.0258 Acc: 0.6292 | Val Loss: 5.6514 Acc: 0.4922 Epoch 85/100 | Tr Loss: 0.0011 Acc: 0.6500 | Val Loss: 6.2394 Acc: 0.4766 Epoch 86/100 | Tr Loss: 0.0048 Acc: 0.6667 | Val Loss: 6.7574 Acc: 0.4792 Epoch 87/100 | Tr Loss: 0.0228 Acc: 0.6417 | Val Loss: 9.5711 Acc: 0.4297 Epoch 88/100 | Tr Loss: 0.0187 Acc: 0.6188 | Val Loss: 9.8467 Acc: 0.4062 Epoch 89/100 | Tr Loss: 0.0012 Acc: 0.6000 | Val Loss: 10.7781 Acc: 0.3906 Epoch 90/100 | Tr Loss: 0.0050 Acc: 0.6125 | Val Loss: 11.2515 Acc: 0.3880 Epoch 91/100 | Tr Loss: 0.0320 Acc: 0.6250 | Val Loss: 8.4953 Acc: 0.4479 Epoch 92/100 | Tr Loss: 0.0055 Acc: 0.6438 | Val Loss: 6.9521 Acc: 0.4583 Epoch 93/100 | Tr Loss: 0.0072 Acc: 0.6417 | Val Loss: 8.1242 Acc: 0.4323 Epoch 94/100 | Tr Loss: 0.0175 Acc: 0.6438 | Val Loss: 8.1859 Acc: 0.4219 Epoch 95/100 | Tr Loss: 0.0037 Acc: 0.6479 | Val Loss: 9.7081 Acc: 0.4193 Epoch 96/100 | Tr Loss: 0.0110 Acc: 0.6312 | Val Loss: 10.7197 Acc: 0.4193 Epoch 97/100 | Tr Loss: 0.0163 Acc: 0.6062 | Val Loss: 14.2959 Acc: 0.3125 Epoch 98/100 | Tr Loss: 0.0567 Acc: 0.5708 | Val Loss: 16.9707 Acc: 0.3359 Epoch 99/100 | Tr Loss: 0.0479 Acc: 0.5854 | Val Loss: 7.8664 Acc: 0.3099 Epoch 100/100 | Tr Loss: 0.1242 Acc: 0.5813 | Val Loss: 10.6332 Acc: 0.2448 --- Finished: mkl_sgd --- --- Running: MKL (k=1.5) --- No checkpoint found. Starting from scratch. --- Starting Training: mkl_sgd --- Epoch 1/100 | Tr Loss: 0.6897 Acc: 0.4688 | Val Loss: 3.8474 Acc: 0.2891 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 2/100 | Tr Loss: 0.3470 Acc: 0.5625 | Val Loss: 43.1116 Acc: 0.2396 Epoch 3/100 | Tr Loss: 0.1798 Acc: 0.6438 | Val Loss: 2.5826 Acc: 0.3229 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 4/100 | Tr Loss: 0.3166 Acc: 0.5896 | Val Loss: 33.9657 Acc: 0.1953 Epoch 5/100 | Tr Loss: 0.2482 Acc: 0.6188 | Val Loss: 23.4163 Acc: 0.1328 Epoch 6/100 | Tr Loss: 0.2003 Acc: 0.6167 | Val Loss: 19.1864 Acc: 0.1380 Epoch 7/100 | Tr Loss: 0.1298 Acc: 0.6521 | Val Loss: 8.9824 Acc: 0.1536 Epoch 8/100 | Tr Loss: 0.3585 Acc: 0.6125 | Val Loss: 31.6199 Acc: 0.1328 Epoch 9/100 | Tr Loss: 0.1158 Acc: 0.6667 | Val Loss: 2.2091 Acc: 0.5052 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 10/100 | Tr Loss: 0.1704 Acc: 0.6312 | Val Loss: 4.2790 Acc: 0.3490 Epoch 11/100 | Tr Loss: 0.1141 Acc: 0.6625 | Val Loss: 10.4329 Acc: 0.3698 Epoch 12/100 | Tr Loss: 0.2534 Acc: 0.6208 | Val Loss: 12.0384 Acc: 0.2995 Epoch 13/100 | Tr Loss: 0.2891 Acc: 0.6271 | Val Loss: 18.5906 Acc: 0.2005 Epoch 14/100 | Tr Loss: 0.2942 Acc: 0.5750 | Val Loss: 14.6561 Acc: 0.2474 Epoch 15/100 | Tr Loss: 0.2217 Acc: 0.6104 | Val Loss: 5.0310 Acc: 0.4141 Epoch 16/100 | Tr Loss: 0.1696 Acc: 0.6375 | Val Loss: 6.1628 Acc: 0.4635 Epoch 17/100 | Tr Loss: 0.3275 Acc: 0.5750 | Val Loss: 7.4338 Acc: 0.4818 Epoch 18/100 | Tr Loss: 0.1411 Acc: 0.6542 | Val Loss: 5.6459 Acc: 0.3542 Epoch 19/100 | Tr Loss: 0.1746 Acc: 0.6729 | Val Loss: 7.2812 Acc: 0.2969 Epoch 20/100 | Tr Loss: 0.0506 Acc: 0.6896 | Val Loss: 2.7056 Acc: 0.5365 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 21/100 | Tr Loss: 0.1132 Acc: 0.6917 | Val Loss: 2.2703 Acc: 0.4974 Epoch 22/100 | Tr Loss: 0.0603 Acc: 0.7104 | Val Loss: 3.5578 Acc: 0.4375 Epoch 23/100 | Tr Loss: 0.1485 Acc: 0.6604 | Val Loss: 7.0985 Acc: 0.4297 Epoch 24/100 | Tr Loss: 0.1071 Acc: 0.6667 | Val Loss: 2.7043 Acc: 0.5286 Epoch 25/100 | Tr Loss: 0.1402 Acc: 0.6833 | Val Loss: 3.0925 Acc: 0.5208 Epoch 26/100 | Tr Loss: 0.0465 Acc: 0.7063 | Val Loss: 8.1393 Acc: 0.2969 Epoch 27/100 | Tr Loss: 0.0852 Acc: 0.6875 | Val Loss: 11.5802 Acc: 0.2448 Epoch 28/100 | Tr Loss: 0.1396 Acc: 0.6458 | Val Loss: 27.6475 Acc: 0.1615 Epoch 29/100 | Tr Loss: 0.2137 Acc: 0.6354 | Val Loss: 3.1644 Acc: 0.4870 Epoch 30/100 | Tr Loss: 0.1353 Acc: 0.6625 | Val Loss: 3.5128 Acc: 0.6094 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 31/100 | Tr Loss: 0.0791 Acc: 0.6958 | Val Loss: 5.1682 Acc: 0.5703 Epoch 32/100 | Tr Loss: 0.0474 Acc: 0.7188 | Val Loss: 10.2616 Acc: 0.4714 Epoch 33/100 | Tr Loss: 0.0832 Acc: 0.6771 | Val Loss: 3.9772 Acc: 0.5104 Epoch 34/100 | Tr Loss: 0.1096 Acc: 0.6625 | Val Loss: 6.2420 Acc: 0.5547 Epoch 35/100 | Tr Loss: 0.0913 Acc: 0.7063 | Val Loss: 6.2721 Acc: 0.4844 Epoch 36/100 | Tr Loss: 0.0333 Acc: 0.7229 | Val Loss: 7.5312 Acc: 0.4427 Epoch 37/100 | Tr Loss: 0.0898 Acc: 0.6813 | Val Loss: 10.8454 Acc: 0.4010 Epoch 38/100 | Tr Loss: 0.0883 Acc: 0.7063 | Val Loss: 6.8315 Acc: 0.4557 Epoch 39/100 | Tr Loss: 0.0289 Acc: 0.7188 | Val Loss: 6.5781 Acc: 0.4740 Epoch 40/100 | Tr Loss: 0.0208 Acc: 0.7375 | Val Loss: 7.7262 Acc: 0.4557 Epoch 41/100 | Tr Loss: 0.0296 Acc: 0.7250 | Val Loss: 13.0557 Acc: 0.3880 Epoch 42/100 | Tr Loss: 0.0187 Acc: 0.7417 | Val Loss: 10.5076 Acc: 0.3854 Epoch 43/100 | Tr Loss: 0.0361 Acc: 0.7458 | Val Loss: 5.9520 Acc: 0.4427 Epoch 44/100 | Tr Loss: 0.0162 Acc: 0.7396 | Val Loss: 5.8391 Acc: 0.4531 Epoch 45/100 | Tr Loss: 0.0404 Acc: 0.7354 | Val Loss: 8.0892 Acc: 0.4010 Epoch 46/100 | Tr Loss: 0.1195 Acc: 0.6875 | Val Loss: 4.6421 Acc: 0.4792 Epoch 47/100 | Tr Loss: 0.1657 Acc: 0.6583 | Val Loss: 13.8106 Acc: 0.3984 Epoch 48/100 | Tr Loss: 0.1515 Acc: 0.6542 | Val Loss: 6.5882 Acc: 0.4401 Epoch 49/100 | Tr Loss: 0.0902 Acc: 0.6917 | Val Loss: 5.3494 Acc: 0.4766 Epoch 50/100 | Tr Loss: 0.0581 Acc: 0.7125 | Val Loss: 16.5561 Acc: 0.2604 Epoch 51/100 | Tr Loss: 0.0374 Acc: 0.7250 | Val Loss: 12.8413 Acc: 0.3333 Epoch 52/100 | Tr Loss: 0.0249 Acc: 0.7438 | Val Loss: 19.4348 Acc: 0.2448 Epoch 53/100 | Tr Loss: 0.0069 Acc: 0.7625 | Val Loss: 21.2056 Acc: 0.2292 Epoch 54/100 | Tr Loss: 0.1214 Acc: 0.6917 | Val Loss: 4.7966 Acc: 0.5286 Epoch 55/100 | Tr Loss: 0.1113 Acc: 0.6479 | Val Loss: 9.9050 Acc: 0.3802 Epoch 56/100 | Tr Loss: 0.0737 Acc: 0.6729 | Val Loss: 10.3821 Acc: 0.3932 Epoch 57/100 | Tr Loss: 0.1176 Acc: 0.6542 | Val Loss: 7.6849 Acc: 0.4870 Epoch 58/100 | Tr Loss: 0.0266 Acc: 0.7167 | Val Loss: 5.8572 Acc: 0.4844 Epoch 59/100 | Tr Loss: 0.0261 Acc: 0.7438 | Val Loss: 4.0664 Acc: 0.5000 Epoch 60/100 | Tr Loss: 0.0223 Acc: 0.7229 | Val Loss: 4.4788 Acc: 0.5026 Epoch 61/100 | Tr Loss: 0.0521 Acc: 0.7229 | Val Loss: 3.6771 Acc: 0.5052 Epoch 62/100 | Tr Loss: 0.1270 Acc: 0.7000 | Val Loss: 31.5759 Acc: 0.3828 Epoch 63/100 | Tr Loss: 0.1237 Acc: 0.6417 | Val Loss: 6.1121 Acc: 0.5208 Epoch 64/100 | Tr Loss: 0.0495 Acc: 0.7042 | Val Loss: 8.2488 Acc: 0.5391 Epoch 65/100 | Tr Loss: 0.0666 Acc: 0.6917 | Val Loss: 12.8650 Acc: 0.3854 Epoch 66/100 | Tr Loss: 0.1162 Acc: 0.6708 | Val Loss: 6.9536 Acc: 0.3984 Epoch 67/100 | Tr Loss: 0.0491 Acc: 0.7188 | Val Loss: 4.2966 Acc: 0.5339 Epoch 68/100 | Tr Loss: 0.0511 Acc: 0.7229 | Val Loss: 4.0847 Acc: 0.5208 Epoch 69/100 | Tr Loss: 0.0107 Acc: 0.7521 | Val Loss: 3.7999 Acc: 0.5182 Epoch 70/100 | Tr Loss: 0.0282 Acc: 0.7292 | Val Loss: 3.2974 Acc: 0.5599 Epoch 71/100 | Tr Loss: 0.0263 Acc: 0.7271 | Val Loss: 4.5771 Acc: 0.5469 Epoch 72/100 | Tr Loss: 0.0172 Acc: 0.7333 | Val Loss: 4.2412 Acc: 0.5417 Epoch 73/100 | Tr Loss: 0.0143 Acc: 0.7625 | Val Loss: 5.0567 Acc: 0.4766 Epoch 74/100 | Tr Loss: 0.0138 Acc: 0.7625 | Val Loss: 5.8278 Acc: 0.4766 Epoch 75/100 | Tr Loss: 0.0055 Acc: 0.7458 | Val Loss: 4.8969 Acc: 0.4740 Epoch 76/100 | Tr Loss: 0.0095 Acc: 0.7562 | Val Loss: 4.3199 Acc: 0.4974 Epoch 77/100 | Tr Loss: 0.0110 Acc: 0.7750 | Val Loss: 3.9007 Acc: 0.4948 Epoch 78/100 | Tr Loss: 0.0021 Acc: 0.7875 | Val Loss: 3.7286 Acc: 0.5156 Epoch 79/100 | Tr Loss: 0.0068 Acc: 0.7917 | Val Loss: 4.0957 Acc: 0.4870 Epoch 80/100 | Tr Loss: 0.0026 Acc: 0.7917 | Val Loss: 4.8621 Acc: 0.4948 Epoch 81/100 | Tr Loss: 0.0051 Acc: 0.7792 | Val Loss: 4.1983 Acc: 0.5156 Epoch 82/100 | Tr Loss: 0.0035 Acc: 0.7854 | Val Loss: 4.2854 Acc: 0.5026 Epoch 83/100 | Tr Loss: 0.0141 Acc: 0.7438 | Val Loss: 4.5462 Acc: 0.5104 Epoch 84/100 | Tr Loss: 0.0198 Acc: 0.7250 | Val Loss: 4.0986 Acc: 0.5755 Epoch 85/100 | Tr Loss: 0.0127 Acc: 0.7479 | Val Loss: 3.3089 Acc: 0.6328 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_5_best.pth Epoch 86/100 | Tr Loss: 0.0081 Acc: 0.7521 | Val Loss: 4.1645 Acc: 0.5599 Epoch 87/100 | Tr Loss: 0.0101 Acc: 0.7625 | Val Loss: 6.0259 Acc: 0.4766 Epoch 88/100 | Tr Loss: 0.0321 Acc: 0.7750 | Val Loss: 7.2355 Acc: 0.4271 Epoch 89/100 | Tr Loss: 0.0169 Acc: 0.7646 | Val Loss: 10.8460 Acc: 0.3776 Epoch 90/100 | Tr Loss: 0.0770 Acc: 0.6979 | Val Loss: 14.3091 Acc: 0.4323 Epoch 91/100 | Tr Loss: 0.0543 Acc: 0.7146 | Val Loss: 15.3693 Acc: 0.4167 Epoch 92/100 | Tr Loss: 0.0886 Acc: 0.6583 | Val Loss: 4.9813 Acc: 0.5964 Epoch 93/100 | Tr Loss: 0.1735 Acc: 0.6708 | Val Loss: 3.6125 Acc: 0.6094 Epoch 94/100 | Tr Loss: 0.0559 Acc: 0.7188 | Val Loss: 12.2070 Acc: 0.3776 Epoch 95/100 | Tr Loss: 0.0438 Acc: 0.7583 | Val Loss: 10.8426 Acc: 0.3359 Epoch 96/100 | Tr Loss: 0.0077 Acc: 0.7542 | Val Loss: 14.4675 Acc: 0.2969 Epoch 97/100 | Tr Loss: 0.0188 Acc: 0.7396 | Val Loss: 13.9964 Acc: 0.2656 Epoch 98/100 | Tr Loss: 0.0454 Acc: 0.7167 | Val Loss: 9.0360 Acc: 0.3255 Epoch 99/100 | Tr Loss: 0.0806 Acc: 0.7333 | Val Loss: 18.0380 Acc: 0.2318 Epoch 100/100 | Tr Loss: 0.0318 Acc: 0.7375 | Val Loss: 9.4726 Acc: 0.3698 --- Finished: mkl_sgd --- --- Running: MKL (k=1.25) --- No checkpoint found. Starting from scratch. --- Starting Training: mkl_sgd --- Epoch 1/100 | Tr Loss: 0.8042 Acc: 0.5292 | Val Loss: 2.9592 Acc: 0.2865 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth Epoch 2/100 | Tr Loss: 0.5082 Acc: 0.6167 | Val Loss: 2.0682 Acc: 0.2396 Epoch 3/100 | Tr Loss: 0.3752 Acc: 0.6479 | Val Loss: 2.5556 Acc: 0.2500 Epoch 4/100 | Tr Loss: 0.2774 Acc: 0.7125 | Val Loss: 2.5053 Acc: 0.4349 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth Epoch 5/100 | Tr Loss: 0.2336 Acc: 0.7250 | Val Loss: 2.4649 Acc: 0.3490 Epoch 6/100 | Tr Loss: 0.3987 Acc: 0.6604 | Val Loss: 4.3147 Acc: 0.3828 Epoch 7/100 | Tr Loss: 0.5582 Acc: 0.6125 | Val Loss: 42.1024 Acc: 0.1042 Epoch 8/100 | Tr Loss: 0.3862 Acc: 0.6604 | Val Loss: 5.1412 Acc: 0.2292 Epoch 9/100 | Tr Loss: 0.2104 Acc: 0.7458 | Val Loss: 3.5553 Acc: 0.2422 Epoch 10/100 | Tr Loss: 0.2570 Acc: 0.7312 | Val Loss: 13.2995 Acc: 0.1328 Epoch 11/100 | Tr Loss: 0.1100 Acc: 0.7625 | Val Loss: 11.1726 Acc: 0.1328 Epoch 12/100 | Tr Loss: 0.0883 Acc: 0.7750 | Val Loss: 6.2257 Acc: 0.2786 Epoch 13/100 | Tr Loss: 0.1337 Acc: 0.7625 | Val Loss: 6.7606 Acc: 0.1354 Epoch 14/100 | Tr Loss: 0.2066 Acc: 0.7271 | Val Loss: 10.1827 Acc: 0.1484 Epoch 15/100 | Tr Loss: 0.3128 Acc: 0.6708 | Val Loss: 30.4556 Acc: 0.1667 Epoch 16/100 | Tr Loss: 0.3364 Acc: 0.6708 | Val Loss: 54.6696 Acc: 0.1380 Epoch 17/100 | Tr Loss: 0.2537 Acc: 0.7042 | Val Loss: 23.1828 Acc: 0.1328 Epoch 18/100 | Tr Loss: 0.1469 Acc: 0.7583 | Val Loss: 14.5949 Acc: 0.1536 Epoch 19/100 | Tr Loss: 0.1877 Acc: 0.7438 | Val Loss: 16.2413 Acc: 0.1458 Epoch 20/100 | Tr Loss: 0.1585 Acc: 0.7458 | Val Loss: 43.2556 Acc: 0.1328 Epoch 21/100 | Tr Loss: 0.1816 Acc: 0.7375 | Val Loss: 20.2814 Acc: 0.1406 Epoch 22/100 | Tr Loss: 0.1573 Acc: 0.7396 | Val Loss: 16.9620 Acc: 0.1380 Epoch 23/100 | Tr Loss: 0.1781 Acc: 0.7604 | Val Loss: 8.8825 Acc: 0.2917 Epoch 24/100 | Tr Loss: 0.1364 Acc: 0.7708 | Val Loss: 15.7230 Acc: 0.1615 Epoch 25/100 | Tr Loss: 0.0695 Acc: 0.8042 | Val Loss: 11.7365 Acc: 0.2135 Epoch 26/100 | Tr Loss: 0.0482 Acc: 0.8146 | Val Loss: 6.9497 Acc: 0.2318 Epoch 27/100 | Tr Loss: 0.0453 Acc: 0.8021 | Val Loss: 10.8572 Acc: 0.1875 Epoch 28/100 | Tr Loss: 0.1256 Acc: 0.7917 | Val Loss: 20.6269 Acc: 0.1406 Epoch 29/100 | Tr Loss: 0.1688 Acc: 0.7604 | Val Loss: 8.3296 Acc: 0.1641 Epoch 30/100 | Tr Loss: 0.1970 Acc: 0.7354 | Val Loss: 7.1015 Acc: 0.2214 Epoch 31/100 | Tr Loss: 0.1571 Acc: 0.7479 | Val Loss: 9.6259 Acc: 0.3359 Epoch 32/100 | Tr Loss: 0.1292 Acc: 0.7625 | Val Loss: 5.1970 Acc: 0.3464 Epoch 33/100 | Tr Loss: 0.0573 Acc: 0.8292 | Val Loss: 4.4734 Acc: 0.3438 Epoch 34/100 | Tr Loss: 0.0831 Acc: 0.7708 | Val Loss: 14.4033 Acc: 0.2370 Epoch 35/100 | Tr Loss: 0.0372 Acc: 0.8104 | Val Loss: 6.0889 Acc: 0.3255 Epoch 36/100 | Tr Loss: 0.0536 Acc: 0.8083 | Val Loss: 8.0278 Acc: 0.2396 Epoch 37/100 | Tr Loss: 0.0942 Acc: 0.7979 | Val Loss: 6.7454 Acc: 0.2109 Epoch 38/100 | Tr Loss: 0.0572 Acc: 0.8271 | Val Loss: 7.4236 Acc: 0.2083 Epoch 39/100 | Tr Loss: 0.0530 Acc: 0.8438 | Val Loss: 6.7611 Acc: 0.2604 Epoch 40/100 | Tr Loss: 0.0388 Acc: 0.8104 | Val Loss: 7.3164 Acc: 0.4010 Epoch 41/100 | Tr Loss: 0.1458 Acc: 0.7833 | Val Loss: 3.6370 Acc: 0.3411 Epoch 42/100 | Tr Loss: 0.0868 Acc: 0.7875 | Val Loss: 4.1601 Acc: 0.2396 Epoch 43/100 | Tr Loss: 0.0840 Acc: 0.7812 | Val Loss: 5.2688 Acc: 0.3385 Epoch 44/100 | Tr Loss: 0.1798 Acc: 0.7479 | Val Loss: 9.1699 Acc: 0.3333 Epoch 45/100 | Tr Loss: 0.1137 Acc: 0.7667 | Val Loss: 6.9079 Acc: 0.3516 Epoch 46/100 | Tr Loss: 0.0603 Acc: 0.7958 | Val Loss: 5.5053 Acc: 0.3698 Epoch 47/100 | Tr Loss: 0.0537 Acc: 0.7917 | Val Loss: 5.8707 Acc: 0.3672 Epoch 48/100 | Tr Loss: 0.0259 Acc: 0.8208 | Val Loss: 7.7380 Acc: 0.2708 Epoch 49/100 | Tr Loss: 0.0321 Acc: 0.8271 | Val Loss: 10.8262 Acc: 0.2318 Epoch 50/100 | Tr Loss: 0.0914 Acc: 0.7979 | Val Loss: 6.0883 Acc: 0.3750 Epoch 51/100 | Tr Loss: 0.0797 Acc: 0.8125 | Val Loss: 7.6651 Acc: 0.3958 Epoch 52/100 | Tr Loss: 0.0300 Acc: 0.8271 | Val Loss: 7.8664 Acc: 0.2969 Epoch 53/100 | Tr Loss: 0.0823 Acc: 0.7979 | Val Loss: 7.2776 Acc: 0.3385 Epoch 54/100 | Tr Loss: 0.0882 Acc: 0.7937 | Val Loss: 5.3470 Acc: 0.3438 Epoch 55/100 | Tr Loss: 0.1350 Acc: 0.7562 | Val Loss: 12.4242 Acc: 0.2995 Epoch 56/100 | Tr Loss: 0.1140 Acc: 0.7896 | Val Loss: 4.9381 Acc: 0.3984 Epoch 57/100 | Tr Loss: 0.2379 Acc: 0.7458 | Val Loss: 10.0996 Acc: 0.2786 Epoch 58/100 | Tr Loss: 0.1241 Acc: 0.7750 | Val Loss: 9.8030 Acc: 0.3073 Epoch 59/100 | Tr Loss: 0.1191 Acc: 0.7625 | Val Loss: 7.8580 Acc: 0.2500 Epoch 60/100 | Tr Loss: 0.0556 Acc: 0.8125 | Val Loss: 5.0412 Acc: 0.4115 Epoch 61/100 | Tr Loss: 0.0747 Acc: 0.8208 | Val Loss: 5.3242 Acc: 0.3203 Epoch 62/100 | Tr Loss: 0.0252 Acc: 0.8333 | Val Loss: 6.7784 Acc: 0.3229 Epoch 63/100 | Tr Loss: 0.0383 Acc: 0.8250 | Val Loss: 7.1806 Acc: 0.3516 Epoch 64/100 | Tr Loss: 0.0635 Acc: 0.8208 | Val Loss: 6.9636 Acc: 0.4036 Epoch 65/100 | Tr Loss: 0.0292 Acc: 0.8333 | Val Loss: 7.9036 Acc: 0.4062 Epoch 66/100 | Tr Loss: 0.0341 Acc: 0.8187 | Val Loss: 9.4977 Acc: 0.3464 Epoch 67/100 | Tr Loss: 0.0526 Acc: 0.8146 | Val Loss: 7.9766 Acc: 0.4375 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth Epoch 68/100 | Tr Loss: 0.1233 Acc: 0.7792 | Val Loss: 10.2371 Acc: 0.3490 Epoch 69/100 | Tr Loss: 0.1452 Acc: 0.7500 | Val Loss: 9.0428 Acc: 0.3594 Epoch 70/100 | Tr Loss: 0.1019 Acc: 0.7812 | Val Loss: 5.7058 Acc: 0.4609 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth Epoch 71/100 | Tr Loss: 0.1703 Acc: 0.7625 | Val Loss: 2.9162 Acc: 0.6432 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth Epoch 72/100 | Tr Loss: 0.0637 Acc: 0.8021 | Val Loss: 3.6105 Acc: 0.5807 Epoch 73/100 | Tr Loss: 0.0521 Acc: 0.8187 | Val Loss: 3.6219 Acc: 0.5469 Epoch 74/100 | Tr Loss: 0.0460 Acc: 0.8250 | Val Loss: 4.2994 Acc: 0.5495 Epoch 75/100 | Tr Loss: 0.0250 Acc: 0.8438 | Val Loss: 5.0321 Acc: 0.5260 Epoch 76/100 | Tr Loss: 0.0688 Acc: 0.8271 | Val Loss: 5.2134 Acc: 0.5339 Epoch 77/100 | Tr Loss: 0.0626 Acc: 0.8187 | Val Loss: 4.9376 Acc: 0.5026 Epoch 78/100 | Tr Loss: 0.0390 Acc: 0.8042 | Val Loss: 5.6134 Acc: 0.4740 Epoch 79/100 | Tr Loss: 0.0184 Acc: 0.8438 | Val Loss: 5.3722 Acc: 0.4792 Epoch 80/100 | Tr Loss: 0.0228 Acc: 0.8500 | Val Loss: 4.2771 Acc: 0.5469 Epoch 81/100 | Tr Loss: 0.0931 Acc: 0.8271 | Val Loss: 3.8818 Acc: 0.5260 Epoch 82/100 | Tr Loss: 0.0896 Acc: 0.8042 | Val Loss: 3.8404 Acc: 0.5208 Epoch 83/100 | Tr Loss: 0.0814 Acc: 0.8021 | Val Loss: 4.3694 Acc: 0.5078 Epoch 84/100 | Tr Loss: 0.0721 Acc: 0.8021 | Val Loss: 4.0915 Acc: 0.5000 Epoch 85/100 | Tr Loss: 0.1087 Acc: 0.8063 | Val Loss: 3.8317 Acc: 0.5208 Epoch 86/100 | Tr Loss: 0.0756 Acc: 0.8021 | Val Loss: 3.2985 Acc: 0.6198 Epoch 87/100 | Tr Loss: 0.1361 Acc: 0.7896 | Val Loss: 3.9091 Acc: 0.5599 Epoch 88/100 | Tr Loss: 0.0645 Acc: 0.7979 | Val Loss: 3.8350 Acc: 0.5677 Epoch 89/100 | Tr Loss: 0.0819 Acc: 0.7979 | Val Loss: 4.7390 Acc: 0.6068 Epoch 90/100 | Tr Loss: 0.0696 Acc: 0.8125 | Val Loss: 3.4566 Acc: 0.5417 Epoch 91/100 | Tr Loss: 0.0659 Acc: 0.8167 | Val Loss: 3.6774 Acc: 0.5026 Epoch 92/100 | Tr Loss: 0.0822 Acc: 0.7937 | Val Loss: 3.5126 Acc: 0.5156 Epoch 93/100 | Tr Loss: 0.0599 Acc: 0.8229 | Val Loss: 3.9403 Acc: 0.4870 Epoch 94/100 | Tr Loss: 0.0638 Acc: 0.8313 | Val Loss: 3.6567 Acc: 0.5104 Epoch 95/100 | Tr Loss: 0.0668 Acc: 0.8083 | Val Loss: 5.0986 Acc: 0.3802 Epoch 96/100 | Tr Loss: 0.0427 Acc: 0.8208 | Val Loss: 4.4143 Acc: 0.3932 Epoch 97/100 | Tr Loss: 0.0379 Acc: 0.8125 | Val Loss: 3.7740 Acc: 0.3880 Epoch 98/100 | Tr Loss: 0.0803 Acc: 0.7958 | Val Loss: 6.1867 Acc: 0.4479 Epoch 99/100 | Tr Loss: 0.1320 Acc: 0.7812 | Val Loss: 5.1858 Acc: 0.5391 Epoch 100/100 | Tr Loss: 0.0704 Acc: 0.7896 | Val Loss: 3.1017 Acc: 0.6823 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_MKL_k=1_25_best.pth --- Finished: mkl_sgd --- --- Running: RHO (sel 20%) --- No checkpoint found. Starting from scratch. --- Starting Training: rho_loss --- Epoch 1/100 | Tr Loss: 1.6244 Acc: 0.2625 | Val Loss: 11.0136 Acc: 0.2344 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_20_best.pth Epoch 2/100 | Tr Loss: 0.5360 Acc: 0.2646 | Val Loss: 3.0517 Acc: 0.2266 Epoch 3/100 | Tr Loss: 0.5248 Acc: 0.2625 | Val Loss: 3.6514 Acc: 0.2344 Epoch 4/100 | Tr Loss: 0.3742 Acc: 0.2521 | Val Loss: 4.1008 Acc: 0.2344 Epoch 5/100 | Tr Loss: 0.3933 Acc: 0.2625 | Val Loss: 4.3633 Acc: 0.2344 Epoch 6/100 | Tr Loss: 0.2513 Acc: 0.2583 | Val Loss: 4.0673 Acc: 0.2344 Epoch 7/100 | Tr Loss: 0.4819 Acc: 0.2646 | Val Loss: 3.6711 Acc: 0.2344 Epoch 8/100 | Tr Loss: 0.3270 Acc: 0.2646 | Val Loss: 4.2745 Acc: 0.2344 Epoch 9/100 | Tr Loss: 0.4611 Acc: 0.2646 | Val Loss: 10.1994 Acc: 0.1432 Epoch 10/100 | Tr Loss: 0.2691 Acc: 0.2583 | Val Loss: 5.9544 Acc: 0.1432 Epoch 11/100 | Tr Loss: 0.4970 Acc: 0.2646 | Val Loss: 3.4106 Acc: 0.1432 Epoch 12/100 | Tr Loss: 0.4755 Acc: 0.2604 | Val Loss: 3.8510 Acc: 0.1432 Epoch 13/100 | Tr Loss: 0.2581 Acc: 0.2667 | Val Loss: 4.2409 Acc: 0.1432 Epoch 14/100 | Tr Loss: 0.2848 Acc: 0.2583 | Val Loss: 4.0436 Acc: 0.1432 Epoch 15/100 | Tr Loss: 0.4006 Acc: 0.2604 | Val Loss: 9.0145 Acc: 0.1432 Epoch 16/100 | Tr Loss: 0.4142 Acc: 0.2687 | Val Loss: 7.2484 Acc: 0.0208 Epoch 17/100 | Tr Loss: 0.3296 Acc: 0.2625 | Val Loss: 8.3842 Acc: 0.1432 Epoch 18/100 | Tr Loss: 0.3559 Acc: 0.2583 | Val Loss: 7.2058 Acc: 0.1432 Epoch 19/100 | Tr Loss: 0.4176 Acc: 0.2687 | Val Loss: 4.2760 Acc: 0.1328 Epoch 20/100 | Tr Loss: 0.2220 Acc: 0.2687 | Val Loss: 4.3094 Acc: 0.1536 Epoch 21/100 | Tr Loss: 0.2126 Acc: 0.2646 | Val Loss: 3.4135 Acc: 0.1641 Epoch 22/100 | Tr Loss: 0.2703 Acc: 0.2646 | Val Loss: 4.1110 Acc: 0.1510 Epoch 23/100 | Tr Loss: 0.3822 Acc: 0.2625 | Val Loss: 5.4004 Acc: 0.1432 Epoch 24/100 | Tr Loss: 0.3464 Acc: 0.2687 | Val Loss: 7.8869 Acc: 0.1432 Epoch 25/100 | Tr Loss: 0.3946 Acc: 0.2687 | Val Loss: 4.8208 Acc: 0.1458 Epoch 26/100 | Tr Loss: 0.3558 Acc: 0.2646 | Val Loss: 5.4748 Acc: 0.1589 Epoch 27/100 | Tr Loss: 0.2754 Acc: 0.2687 | Val Loss: 6.0489 Acc: 0.1667 Epoch 28/100 | Tr Loss: 0.2565 Acc: 0.2667 | Val Loss: 4.3751 Acc: 0.1875 Epoch 29/100 | Tr Loss: 0.4870 Acc: 0.2667 | Val Loss: 3.2525 Acc: 0.2214 Epoch 30/100 | Tr Loss: 0.0930 Acc: 0.2687 | Val Loss: 3.4741 Acc: 0.1901 Epoch 31/100 | Tr Loss: 0.4109 Acc: 0.2604 | Val Loss: 3.6010 Acc: 0.1432 Epoch 32/100 | Tr Loss: 0.3479 Acc: 0.2667 | Val Loss: 7.9544 Acc: 0.1432 Epoch 33/100 | Tr Loss: 0.3652 Acc: 0.2667 | Val Loss: 7.3762 Acc: 0.1432 Epoch 34/100 | Tr Loss: 0.5117 Acc: 0.2646 | Val Loss: 5.6990 Acc: 0.1432 Epoch 35/100 | Tr Loss: 0.2227 Acc: 0.2625 | Val Loss: 11.2769 Acc: 0.1432 Epoch 36/100 | Tr Loss: 0.4551 Acc: 0.2646 | Val Loss: 6.8479 Acc: 0.1432 Epoch 37/100 | Tr Loss: 0.3403 Acc: 0.2667 | Val Loss: 6.7253 Acc: 0.1432 Epoch 38/100 | Tr Loss: 0.3200 Acc: 0.2687 | Val Loss: 9.1830 Acc: 0.1432 Epoch 39/100 | Tr Loss: 0.2574 Acc: 0.2667 | Val Loss: 8.3743 Acc: 0.1432 Epoch 40/100 | Tr Loss: 0.3059 Acc: 0.2687 | Val Loss: 6.3621 Acc: 0.1432 Epoch 41/100 | Tr Loss: 0.3149 Acc: 0.2687 | Val Loss: 6.0257 Acc: 0.1432 Epoch 42/100 | Tr Loss: 0.2213 Acc: 0.2667 | Val Loss: 8.5439 Acc: 0.1432 Epoch 43/100 | Tr Loss: 0.2376 Acc: 0.2687 | Val Loss: 9.7573 Acc: 0.1432 Epoch 44/100 | Tr Loss: 0.2645 Acc: 0.2625 | Val Loss: 10.7675 Acc: 0.1432 Epoch 45/100 | Tr Loss: 0.3184 Acc: 0.2667 | Val Loss: 11.3563 Acc: 0.1432 Epoch 46/100 | Tr Loss: 0.2824 Acc: 0.2667 | Val Loss: 11.7370 Acc: 0.1432 Epoch 47/100 | Tr Loss: 0.3183 Acc: 0.2729 | Val Loss: 13.0823 Acc: 0.1432 Epoch 48/100 | Tr Loss: 0.4112 Acc: 0.2667 | Val Loss: 10.0226 Acc: 0.1432 Epoch 49/100 | Tr Loss: 0.4774 Acc: 0.2625 | Val Loss: 6.3108 Acc: 0.1432 Epoch 50/100 | Tr Loss: 0.3891 Acc: 0.2646 | Val Loss: 11.2239 Acc: 0.1432 Epoch 51/100 | Tr Loss: 0.2771 Acc: 0.2687 | Val Loss: 3.3200 Acc: 0.1432 Epoch 52/100 | Tr Loss: 0.2197 Acc: 0.2562 | Val Loss: 3.2379 Acc: 0.2344 Epoch 53/100 | Tr Loss: 0.5388 Acc: 0.2625 | Val Loss: 3.7851 Acc: 0.1432 Epoch 54/100 | Tr Loss: 0.2022 Acc: 0.2562 | Val Loss: 4.8397 Acc: 0.1432 Epoch 55/100 | Tr Loss: 0.3782 Acc: 0.2667 | Val Loss: 5.2331 Acc: 0.1432 Epoch 56/100 | Tr Loss: 0.2228 Acc: 0.2625 | Val Loss: 4.4520 Acc: 0.1432 Epoch 57/100 | Tr Loss: 0.1968 Acc: 0.2667 | Val Loss: 6.2331 Acc: 0.1432 Epoch 58/100 | Tr Loss: 0.0742 Acc: 0.2667 | Val Loss: 6.8684 Acc: 0.1432 Epoch 59/100 | Tr Loss: 0.3718 Acc: 0.2667 | Val Loss: 8.9772 Acc: 0.1432 Epoch 60/100 | Tr Loss: 0.3480 Acc: 0.2646 | Val Loss: 6.4203 Acc: 0.1432 Epoch 61/100 | Tr Loss: 0.1893 Acc: 0.2708 | Val Loss: 7.4113 Acc: 0.1432 Epoch 62/100 | Tr Loss: 0.2331 Acc: 0.2708 | Val Loss: 9.3872 Acc: 0.1432 Epoch 63/100 | Tr Loss: 0.2892 Acc: 0.2667 | Val Loss: 10.5061 Acc: 0.1432 Epoch 64/100 | Tr Loss: 0.2507 Acc: 0.2667 | Val Loss: 7.8083 Acc: 0.1432 Epoch 65/100 | Tr Loss: 0.2475 Acc: 0.2708 | Val Loss: 9.5391 Acc: 0.1432 Epoch 66/100 | Tr Loss: 0.2316 Acc: 0.2667 | Val Loss: 9.7361 Acc: 0.1432 Epoch 67/100 | Tr Loss: 0.1875 Acc: 0.2667 | Val Loss: 9.5227 Acc: 0.1432 Epoch 68/100 | Tr Loss: 0.3884 Acc: 0.2646 | Val Loss: 10.2440 Acc: 0.1432 Epoch 69/100 | Tr Loss: 0.1941 Acc: 0.2625 | Val Loss: 14.4380 Acc: 0.1432 Epoch 70/100 | Tr Loss: 0.2644 Acc: 0.2646 | Val Loss: 15.4842 Acc: 0.1432 Epoch 71/100 | Tr Loss: 0.1524 Acc: 0.2667 | Val Loss: 8.7857 Acc: 0.1432 Epoch 72/100 | Tr Loss: 0.3305 Acc: 0.2625 | Val Loss: 7.7895 Acc: 0.1432 Epoch 73/100 | Tr Loss: 0.2406 Acc: 0.2708 | Val Loss: 6.1612 Acc: 0.1432 Epoch 74/100 | Tr Loss: 0.3590 Acc: 0.2667 | Val Loss: 9.3118 Acc: 0.1432 Epoch 75/100 | Tr Loss: 0.2433 Acc: 0.2646 | Val Loss: 13.1009 Acc: 0.1432 Epoch 76/100 | Tr Loss: 0.2412 Acc: 0.2646 | Val Loss: 14.1788 Acc: 0.1432 Epoch 77/100 | Tr Loss: 0.2975 Acc: 0.2625 | Val Loss: 12.7764 Acc: 0.1432 Epoch 78/100 | Tr Loss: 0.2991 Acc: 0.2604 | Val Loss: 12.1052 Acc: 0.1432 Epoch 79/100 | Tr Loss: 0.2073 Acc: 0.2646 | Val Loss: 5.3471 Acc: 0.1432 Epoch 80/100 | Tr Loss: 0.2338 Acc: 0.2625 | Val Loss: 4.8742 Acc: 0.1484 Epoch 81/100 | Tr Loss: 0.2790 Acc: 0.2604 | Val Loss: 7.4060 Acc: 0.1432 Epoch 82/100 | Tr Loss: 0.1816 Acc: 0.2729 | Val Loss: 10.0509 Acc: 0.1432 Epoch 83/100 | Tr Loss: 0.1605 Acc: 0.2750 | Val Loss: 10.5136 Acc: 0.1432 Epoch 84/100 | Tr Loss: 0.4241 Acc: 0.2729 | Val Loss: 8.5530 Acc: 0.0208 Epoch 85/100 | Tr Loss: 0.3395 Acc: 0.2729 | Val Loss: 12.6710 Acc: 0.0521 Epoch 86/100 | Tr Loss: 0.2996 Acc: 0.2625 | Val Loss: 10.7808 Acc: 0.1432 Epoch 87/100 | Tr Loss: 0.2583 Acc: 0.2667 | Val Loss: 10.1985 Acc: 0.1432 Epoch 88/100 | Tr Loss: 0.1674 Acc: 0.2687 | Val Loss: 8.1816 Acc: 0.1432 Epoch 89/100 | Tr Loss: 0.0281 Acc: 0.2667 | Val Loss: 7.5997 Acc: 0.1458 Epoch 90/100 | Tr Loss: 0.3150 Acc: 0.2708 | Val Loss: 8.5800 Acc: 0.1432 Epoch 91/100 | Tr Loss: 0.1682 Acc: 0.2646 | Val Loss: 9.0712 Acc: 0.1432 Epoch 92/100 | Tr Loss: 0.2321 Acc: 0.2646 | Val Loss: 7.3570 Acc: 0.1458 Epoch 93/100 | Tr Loss: 0.3247 Acc: 0.2667 | Val Loss: 7.0798 Acc: 0.1432 Epoch 94/100 | Tr Loss: 0.2037 Acc: 0.2625 | Val Loss: 8.1832 Acc: 0.1432 Epoch 95/100 | Tr Loss: 0.4202 Acc: 0.2646 | Val Loss: 13.9435 Acc: 0.1432 Epoch 96/100 | Tr Loss: 0.2169 Acc: 0.2687 | Val Loss: 4.1166 Acc: 0.1432 Epoch 97/100 | Tr Loss: 0.2624 Acc: 0.2667 | Val Loss: 4.2796 Acc: 0.1432 Epoch 98/100 | Tr Loss: 0.3749 Acc: 0.2667 | Val Loss: 4.7931 Acc: 0.1432 Epoch 99/100 | Tr Loss: 0.1303 Acc: 0.2708 | Val Loss: 5.0838 Acc: 0.1432 Epoch 100/100 | Tr Loss: 0.3086 Acc: 0.2687 | Val Loss: 5.6836 Acc: 0.1432 --- Finished: rho_loss --- --- Running: RHO (sel 30%) --- No checkpoint found. Starting from scratch. --- Starting Training: rho_loss --- Epoch 1/100 | Tr Loss: 1.3832 Acc: 0.2479 | Val Loss: 2.5490 Acc: 0.1589 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 2/100 | Tr Loss: 0.7754 Acc: 0.2542 | Val Loss: 3.4643 Acc: 0.1432 Epoch 3/100 | Tr Loss: 0.6541 Acc: 0.2687 | Val Loss: 4.3309 Acc: 0.0286 Epoch 4/100 | Tr Loss: 0.7630 Acc: 0.2812 | Val Loss: 6.5654 Acc: 0.1432 Epoch 5/100 | Tr Loss: 0.6421 Acc: 0.2729 | Val Loss: 2.7397 Acc: 0.1615 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 6/100 | Tr Loss: 0.6038 Acc: 0.2646 | Val Loss: 2.9210 Acc: 0.1615 Epoch 7/100 | Tr Loss: 0.6463 Acc: 0.2812 | Val Loss: 2.6668 Acc: 0.1484 Epoch 8/100 | Tr Loss: 0.6705 Acc: 0.2771 | Val Loss: 2.7460 Acc: 0.1823 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 9/100 | Tr Loss: 0.5235 Acc: 0.2792 | Val Loss: 2.8991 Acc: 0.1458 Epoch 10/100 | Tr Loss: 0.5894 Acc: 0.2771 | Val Loss: 2.7960 Acc: 0.0547 Epoch 11/100 | Tr Loss: 0.7221 Acc: 0.2917 | Val Loss: 3.2434 Acc: 0.1432 Epoch 12/100 | Tr Loss: 0.6660 Acc: 0.3042 | Val Loss: 2.8762 Acc: 0.1432 Epoch 13/100 | Tr Loss: 0.5807 Acc: 0.2750 | Val Loss: 6.3132 Acc: 0.1432 Epoch 14/100 | Tr Loss: 0.6652 Acc: 0.3063 | Val Loss: 4.4293 Acc: 0.0208 Epoch 15/100 | Tr Loss: 0.6541 Acc: 0.2854 | Val Loss: 3.0252 Acc: 0.0781 Epoch 16/100 | Tr Loss: 0.5727 Acc: 0.3187 | Val Loss: 3.2477 Acc: 0.1458 Epoch 17/100 | Tr Loss: 0.6883 Acc: 0.3292 | Val Loss: 4.6879 Acc: 0.0208 Epoch 18/100 | Tr Loss: 0.5802 Acc: 0.2896 | Val Loss: 4.9017 Acc: 0.1432 Epoch 19/100 | Tr Loss: 0.6498 Acc: 0.2875 | Val Loss: 5.6755 Acc: 0.1432 Epoch 20/100 | Tr Loss: 0.5314 Acc: 0.3292 | Val Loss: 3.9027 Acc: 0.1432 Epoch 21/100 | Tr Loss: 0.6176 Acc: 0.3271 | Val Loss: 2.2885 Acc: 0.1745 Epoch 22/100 | Tr Loss: 0.5116 Acc: 0.3521 | Val Loss: 1.8478 Acc: 0.2995 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 23/100 | Tr Loss: 0.5974 Acc: 0.3167 | Val Loss: 3.3807 Acc: 0.1615 Epoch 24/100 | Tr Loss: 0.4670 Acc: 0.3063 | Val Loss: 2.2492 Acc: 0.1302 Epoch 25/100 | Tr Loss: 0.5337 Acc: 0.3187 | Val Loss: 2.6998 Acc: 0.1823 Epoch 26/100 | Tr Loss: 0.5648 Acc: 0.3583 | Val Loss: 4.2418 Acc: 0.1536 Epoch 27/100 | Tr Loss: 0.5605 Acc: 0.3792 | Val Loss: 9.4379 Acc: 0.1562 Epoch 28/100 | Tr Loss: 0.5255 Acc: 0.2833 | Val Loss: 22.3761 Acc: 0.1432 Epoch 29/100 | Tr Loss: 0.5241 Acc: 0.3125 | Val Loss: 4.6610 Acc: 0.1615 Epoch 30/100 | Tr Loss: 0.4778 Acc: 0.2958 | Val Loss: 8.8356 Acc: 0.1432 Epoch 31/100 | Tr Loss: 0.5411 Acc: 0.3396 | Val Loss: 5.4425 Acc: 0.1406 Epoch 32/100 | Tr Loss: 0.5262 Acc: 0.3688 | Val Loss: 6.2439 Acc: 0.2422 Epoch 33/100 | Tr Loss: 0.5336 Acc: 0.3458 | Val Loss: 8.7346 Acc: 0.1458 Epoch 34/100 | Tr Loss: 0.4444 Acc: 0.3333 | Val Loss: 2.9144 Acc: 0.1615 Epoch 35/100 | Tr Loss: 0.5579 Acc: 0.3417 | Val Loss: 2.6089 Acc: 0.2370 Epoch 36/100 | Tr Loss: 0.3659 Acc: 0.4042 | Val Loss: 3.1755 Acc: 0.1979 Epoch 37/100 | Tr Loss: 0.5517 Acc: 0.3812 | Val Loss: 4.6603 Acc: 0.1615 Epoch 38/100 | Tr Loss: 0.6553 Acc: 0.4229 | Val Loss: 6.4155 Acc: 0.1849 Epoch 39/100 | Tr Loss: 0.6275 Acc: 0.2771 | Val Loss: 2.6155 Acc: 0.2188 Epoch 40/100 | Tr Loss: 0.5903 Acc: 0.2854 | Val Loss: 3.6779 Acc: 0.1536 Epoch 41/100 | Tr Loss: 0.4689 Acc: 0.3083 | Val Loss: 4.4527 Acc: 0.2396 Epoch 42/100 | Tr Loss: 0.4523 Acc: 0.3458 | Val Loss: 6.7511 Acc: 0.1432 Epoch 43/100 | Tr Loss: 0.4301 Acc: 0.3521 | Val Loss: 6.6131 Acc: 0.1589 Epoch 44/100 | Tr Loss: 0.5257 Acc: 0.3521 | Val Loss: 3.3880 Acc: 0.1562 Epoch 45/100 | Tr Loss: 0.5167 Acc: 0.4792 | Val Loss: 5.6940 Acc: 0.1589 Epoch 46/100 | Tr Loss: 0.4481 Acc: 0.4125 | Val Loss: 3.3127 Acc: 0.1536 Epoch 47/100 | Tr Loss: 0.6239 Acc: 0.3812 | Val Loss: 3.2267 Acc: 0.1354 Epoch 48/100 | Tr Loss: 0.5739 Acc: 0.4271 | Val Loss: 3.4305 Acc: 0.1484 Epoch 49/100 | Tr Loss: 0.4250 Acc: 0.3688 | Val Loss: 5.3355 Acc: 0.1536 Epoch 50/100 | Tr Loss: 0.5553 Acc: 0.3729 | Val Loss: 5.0660 Acc: 0.1432 Epoch 51/100 | Tr Loss: 0.4722 Acc: 0.4375 | Val Loss: 2.7988 Acc: 0.1719 Epoch 52/100 | Tr Loss: 0.4610 Acc: 0.3646 | Val Loss: 4.3194 Acc: 0.1562 Epoch 53/100 | Tr Loss: 0.4457 Acc: 0.4042 | Val Loss: 3.7782 Acc: 0.1146 Epoch 54/100 | Tr Loss: 0.4203 Acc: 0.3542 | Val Loss: 3.6726 Acc: 0.1536 Epoch 55/100 | Tr Loss: 0.5015 Acc: 0.3979 | Val Loss: 3.4470 Acc: 0.1536 Epoch 56/100 | Tr Loss: 0.3333 Acc: 0.4333 | Val Loss: 2.6135 Acc: 0.1615 Epoch 57/100 | Tr Loss: 0.6343 Acc: 0.3792 | Val Loss: 2.6729 Acc: 0.2318 Epoch 58/100 | Tr Loss: 0.5047 Acc: 0.4188 | Val Loss: 5.0150 Acc: 0.1797 Epoch 59/100 | Tr Loss: 0.4394 Acc: 0.4188 | Val Loss: 1.8563 Acc: 0.3073 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 60/100 | Tr Loss: 0.5841 Acc: 0.4604 | Val Loss: 2.1577 Acc: 0.2500 Epoch 61/100 | Tr Loss: 0.3730 Acc: 0.3812 | Val Loss: 1.8744 Acc: 0.3359 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 62/100 | Tr Loss: 0.3791 Acc: 0.3708 | Val Loss: 2.2084 Acc: 0.1979 Epoch 63/100 | Tr Loss: 0.4081 Acc: 0.4458 | Val Loss: 2.5347 Acc: 0.2083 Epoch 64/100 | Tr Loss: 0.4486 Acc: 0.4042 | Val Loss: 2.3517 Acc: 0.2057 Epoch 65/100 | Tr Loss: 0.4616 Acc: 0.4313 | Val Loss: 1.8422 Acc: 0.2760 Epoch 66/100 | Tr Loss: 0.3906 Acc: 0.4375 | Val Loss: 2.3311 Acc: 0.2083 Epoch 67/100 | Tr Loss: 0.6201 Acc: 0.4833 | Val Loss: 2.8653 Acc: 0.1667 Epoch 68/100 | Tr Loss: 0.4827 Acc: 0.3688 | Val Loss: 2.5601 Acc: 0.2135 Epoch 69/100 | Tr Loss: 0.3951 Acc: 0.4854 | Val Loss: 2.2267 Acc: 0.2760 Epoch 70/100 | Tr Loss: 0.4826 Acc: 0.5146 | Val Loss: 4.2468 Acc: 0.2083 Epoch 71/100 | Tr Loss: 0.4134 Acc: 0.4500 | Val Loss: 3.3681 Acc: 0.2292 Epoch 72/100 | Tr Loss: 0.3507 Acc: 0.4646 | Val Loss: 2.3359 Acc: 0.2214 Epoch 73/100 | Tr Loss: 0.6017 Acc: 0.4292 | Val Loss: 2.9248 Acc: 0.2448 Epoch 74/100 | Tr Loss: 0.3836 Acc: 0.4729 | Val Loss: 1.6688 Acc: 0.4349 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_30_best.pth Epoch 75/100 | Tr Loss: 0.5052 Acc: 0.4062 | Val Loss: 2.5451 Acc: 0.1901 Epoch 76/100 | Tr Loss: 0.3987 Acc: 0.4583 | Val Loss: 4.3222 Acc: 0.1667 Epoch 77/100 | Tr Loss: 0.3215 Acc: 0.4688 | Val Loss: 3.5808 Acc: 0.2031 Epoch 78/100 | Tr Loss: 0.1968 Acc: 0.4417 | Val Loss: 3.9575 Acc: 0.2135 Epoch 79/100 | Tr Loss: 0.4945 Acc: 0.4271 | Val Loss: 5.8707 Acc: 0.1719 Epoch 80/100 | Tr Loss: 0.4726 Acc: 0.5354 | Val Loss: 2.8680 Acc: 0.2344 Epoch 81/100 | Tr Loss: 0.4134 Acc: 0.4938 | Val Loss: 1.7948 Acc: 0.3490 Epoch 82/100 | Tr Loss: 0.3880 Acc: 0.4250 | Val Loss: 1.5150 Acc: 0.3880 Epoch 83/100 | Tr Loss: 0.3600 Acc: 0.5250 | Val Loss: 3.7777 Acc: 0.2292 Epoch 84/100 | Tr Loss: 0.4639 Acc: 0.4604 | Val Loss: 2.3151 Acc: 0.3464 Epoch 85/100 | Tr Loss: 0.3509 Acc: 0.4604 | Val Loss: 2.0999 Acc: 0.3802 Epoch 86/100 | Tr Loss: 0.4485 Acc: 0.5021 | Val Loss: 2.0577 Acc: 0.3203 Epoch 87/100 | Tr Loss: 0.4348 Acc: 0.5021 | Val Loss: 1.8667 Acc: 0.3464 Epoch 88/100 | Tr Loss: 0.3215 Acc: 0.4854 | Val Loss: 3.6299 Acc: 0.2448 Epoch 89/100 | Tr Loss: 0.3172 Acc: 0.4792 | Val Loss: 2.2929 Acc: 0.2917 Epoch 90/100 | Tr Loss: 0.4715 Acc: 0.4896 | Val Loss: 2.7804 Acc: 0.2526 Epoch 91/100 | Tr Loss: 0.2594 Acc: 0.5375 | Val Loss: 2.1282 Acc: 0.2578 Epoch 92/100 | Tr Loss: 0.3199 Acc: 0.4437 | Val Loss: 1.9263 Acc: 0.4245 Epoch 93/100 | Tr Loss: 0.3841 Acc: 0.4771 | Val Loss: 1.7225 Acc: 0.3359 Epoch 94/100 | Tr Loss: 0.4585 Acc: 0.5771 | Val Loss: 2.3525 Acc: 0.2526 Epoch 95/100 | Tr Loss: 0.3968 Acc: 0.5125 | Val Loss: 3.8960 Acc: 0.1979 Epoch 96/100 | Tr Loss: 0.3709 Acc: 0.5146 | Val Loss: 2.8655 Acc: 0.2812 Epoch 97/100 | Tr Loss: 0.3673 Acc: 0.4792 | Val Loss: 4.0616 Acc: 0.1953 Epoch 98/100 | Tr Loss: 0.4445 Acc: 0.4646 | Val Loss: 3.8115 Acc: 0.1589 Epoch 99/100 | Tr Loss: 0.4006 Acc: 0.5083 | Val Loss: 2.5046 Acc: 0.3021 Epoch 100/100 | Tr Loss: 0.2830 Acc: 0.5437 | Val Loss: 1.7136 Acc: 0.3776 --- Finished: rho_loss --- --- Running: RHO (sel 40%) --- No checkpoint found. Starting from scratch. --- Starting Training: rho_loss --- Epoch 1/100 | Tr Loss: 1.6691 Acc: 0.3271 | Val Loss: 5.3726 Acc: 0.2292 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 2/100 | Tr Loss: 1.2361 Acc: 0.3187 | Val Loss: 5.7582 Acc: 0.0208 Epoch 3/100 | Tr Loss: 0.8668 Acc: 0.3646 | Val Loss: 3.4620 Acc: 0.0807 Epoch 4/100 | Tr Loss: 0.8449 Acc: 0.4146 | Val Loss: 2.0877 Acc: 0.2682 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 5/100 | Tr Loss: 0.8685 Acc: 0.3937 | Val Loss: 2.5385 Acc: 0.2266 Epoch 6/100 | Tr Loss: 0.8418 Acc: 0.4562 | Val Loss: 2.3615 Acc: 0.2370 Epoch 7/100 | Tr Loss: 0.7841 Acc: 0.3667 | Val Loss: 2.0765 Acc: 0.2396 Epoch 8/100 | Tr Loss: 0.8801 Acc: 0.4042 | Val Loss: 1.8319 Acc: 0.2344 Epoch 9/100 | Tr Loss: 0.9009 Acc: 0.4333 | Val Loss: 1.8933 Acc: 0.2422 Epoch 10/100 | Tr Loss: 0.7326 Acc: 0.5125 | Val Loss: 2.3779 Acc: 0.2344 Epoch 11/100 | Tr Loss: 0.6914 Acc: 0.5000 | Val Loss: 2.1683 Acc: 0.2422 Epoch 12/100 | Tr Loss: 0.5724 Acc: 0.5646 | Val Loss: 3.1603 Acc: 0.1771 Epoch 13/100 | Tr Loss: 0.8815 Acc: 0.4813 | Val Loss: 3.0094 Acc: 0.1901 Epoch 14/100 | Tr Loss: 0.7775 Acc: 0.4083 | Val Loss: 3.1232 Acc: 0.1536 Epoch 15/100 | Tr Loss: 0.7116 Acc: 0.3771 | Val Loss: 1.9308 Acc: 0.2500 Epoch 16/100 | Tr Loss: 0.6635 Acc: 0.4792 | Val Loss: 2.0380 Acc: 0.2240 Epoch 17/100 | Tr Loss: 0.6395 Acc: 0.5083 | Val Loss: 2.1177 Acc: 0.1901 Epoch 18/100 | Tr Loss: 0.5978 Acc: 0.5083 | Val Loss: 2.1279 Acc: 0.2812 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 19/100 | Tr Loss: 0.5170 Acc: 0.5292 | Val Loss: 2.3173 Acc: 0.2422 Epoch 20/100 | Tr Loss: 0.5139 Acc: 0.5583 | Val Loss: 2.4758 Acc: 0.2917 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 21/100 | Tr Loss: 0.6327 Acc: 0.5729 | Val Loss: 1.9158 Acc: 0.2630 Epoch 22/100 | Tr Loss: 0.6998 Acc: 0.5938 | Val Loss: 2.8893 Acc: 0.1458 Epoch 23/100 | Tr Loss: 0.7635 Acc: 0.6146 | Val Loss: 4.3203 Acc: 0.2344 Epoch 24/100 | Tr Loss: 0.6620 Acc: 0.4833 | Val Loss: 4.9831 Acc: 0.2344 Epoch 25/100 | Tr Loss: 0.6302 Acc: 0.5771 | Val Loss: 2.6937 Acc: 0.2448 Epoch 26/100 | Tr Loss: 0.4939 Acc: 0.5771 | Val Loss: 2.2598 Acc: 0.2292 Epoch 27/100 | Tr Loss: 0.5532 Acc: 0.5813 | Val Loss: 2.0639 Acc: 0.2031 Epoch 28/100 | Tr Loss: 0.5999 Acc: 0.6167 | Val Loss: 2.3303 Acc: 0.3203 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 29/100 | Tr Loss: 0.6730 Acc: 0.5646 | Val Loss: 2.6305 Acc: 0.2500 Epoch 30/100 | Tr Loss: 0.6242 Acc: 0.5646 | Val Loss: 3.1896 Acc: 0.2031 Epoch 31/100 | Tr Loss: 0.4137 Acc: 0.6208 | Val Loss: 3.0892 Acc: 0.1849 Epoch 32/100 | Tr Loss: 0.6432 Acc: 0.5875 | Val Loss: 3.1607 Acc: 0.2214 Epoch 33/100 | Tr Loss: 0.5671 Acc: 0.5667 | Val Loss: 3.7295 Acc: 0.1589 Epoch 34/100 | Tr Loss: 0.4384 Acc: 0.6104 | Val Loss: 6.2860 Acc: 0.1693 Epoch 35/100 | Tr Loss: 0.3656 Acc: 0.5917 | Val Loss: 3.1601 Acc: 0.2344 Epoch 36/100 | Tr Loss: 0.5427 Acc: 0.6229 | Val Loss: 1.9797 Acc: 0.3958 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 37/100 | Tr Loss: 0.4403 Acc: 0.5813 | Val Loss: 2.8979 Acc: 0.2474 Epoch 38/100 | Tr Loss: 0.4098 Acc: 0.6500 | Val Loss: 3.3644 Acc: 0.1641 Epoch 39/100 | Tr Loss: 0.2696 Acc: 0.7000 | Val Loss: 3.3768 Acc: 0.1927 Epoch 40/100 | Tr Loss: 0.4855 Acc: 0.6813 | Val Loss: 3.8035 Acc: 0.2422 Epoch 41/100 | Tr Loss: 0.5200 Acc: 0.6375 | Val Loss: 1.4977 Acc: 0.4167 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 42/100 | Tr Loss: 0.6312 Acc: 0.4625 | Val Loss: 1.6192 Acc: 0.4062 Epoch 43/100 | Tr Loss: 0.4665 Acc: 0.6646 | Val Loss: 2.5312 Acc: 0.2865 Epoch 44/100 | Tr Loss: 0.4774 Acc: 0.6396 | Val Loss: 2.1439 Acc: 0.3490 Epoch 45/100 | Tr Loss: 0.4851 Acc: 0.6604 | Val Loss: 2.2898 Acc: 0.3151 Epoch 46/100 | Tr Loss: 0.3433 Acc: 0.6854 | Val Loss: 2.5415 Acc: 0.3646 Epoch 47/100 | Tr Loss: 0.4426 Acc: 0.6708 | Val Loss: 2.1758 Acc: 0.3411 Epoch 48/100 | Tr Loss: 0.4119 Acc: 0.6958 | Val Loss: 2.3529 Acc: 0.3516 Epoch 49/100 | Tr Loss: 0.3665 Acc: 0.6583 | Val Loss: 2.6814 Acc: 0.2604 Epoch 50/100 | Tr Loss: 0.3932 Acc: 0.7063 | Val Loss: 2.2857 Acc: 0.3464 Epoch 51/100 | Tr Loss: 0.3720 Acc: 0.7188 | Val Loss: 4.6907 Acc: 0.2812 Epoch 52/100 | Tr Loss: 0.3325 Acc: 0.6646 | Val Loss: 4.1628 Acc: 0.2917 Epoch 53/100 | Tr Loss: 0.3494 Acc: 0.7271 | Val Loss: 2.3357 Acc: 0.3151 Epoch 54/100 | Tr Loss: 0.3099 Acc: 0.7104 | Val Loss: 1.9740 Acc: 0.4323 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 55/100 | Tr Loss: 0.4239 Acc: 0.7188 | Val Loss: 2.5242 Acc: 0.3672 Epoch 56/100 | Tr Loss: 0.5033 Acc: 0.7771 | Val Loss: 2.5776 Acc: 0.3594 Epoch 57/100 | Tr Loss: 0.4051 Acc: 0.6625 | Val Loss: 2.3907 Acc: 0.2812 Epoch 58/100 | Tr Loss: 0.3065 Acc: 0.7146 | Val Loss: 1.7399 Acc: 0.3385 Epoch 59/100 | Tr Loss: 0.3072 Acc: 0.7000 | Val Loss: 4.3044 Acc: 0.2760 Epoch 60/100 | Tr Loss: 0.2746 Acc: 0.7458 | Val Loss: 2.6782 Acc: 0.3021 Epoch 61/100 | Tr Loss: 0.3515 Acc: 0.7729 | Val Loss: 1.5162 Acc: 0.4271 Epoch 62/100 | Tr Loss: 0.4431 Acc: 0.7333 | Val Loss: 2.3518 Acc: 0.3125 Epoch 63/100 | Tr Loss: 0.3087 Acc: 0.7604 | Val Loss: 3.4168 Acc: 0.2448 Epoch 64/100 | Tr Loss: 0.4483 Acc: 0.6896 | Val Loss: 7.3447 Acc: 0.1693 Epoch 65/100 | Tr Loss: 0.2701 Acc: 0.7458 | Val Loss: 8.0611 Acc: 0.1927 Epoch 66/100 | Tr Loss: 0.5225 Acc: 0.7479 | Val Loss: 4.2549 Acc: 0.3568 Epoch 67/100 | Tr Loss: 0.4774 Acc: 0.6646 | Val Loss: 4.9767 Acc: 0.2135 Epoch 68/100 | Tr Loss: 0.3784 Acc: 0.6917 | Val Loss: 3.1985 Acc: 0.2214 Epoch 69/100 | Tr Loss: 0.4613 Acc: 0.7083 | Val Loss: 4.1890 Acc: 0.2240 Epoch 70/100 | Tr Loss: 0.3093 Acc: 0.7583 | Val Loss: 5.9497 Acc: 0.1641 Epoch 71/100 | Tr Loss: 0.3265 Acc: 0.6958 | Val Loss: 7.3932 Acc: 0.1615 Epoch 72/100 | Tr Loss: 0.2899 Acc: 0.7292 | Val Loss: 9.7143 Acc: 0.1875 Epoch 73/100 | Tr Loss: 0.3196 Acc: 0.7729 | Val Loss: 8.2326 Acc: 0.2057 Epoch 74/100 | Tr Loss: 0.3063 Acc: 0.7250 | Val Loss: 11.7729 Acc: 0.1667 Epoch 75/100 | Tr Loss: 0.2036 Acc: 0.7688 | Val Loss: 9.0559 Acc: 0.1797 Epoch 76/100 | Tr Loss: 0.3679 Acc: 0.7542 | Val Loss: 7.8899 Acc: 0.2214 Epoch 77/100 | Tr Loss: 0.2846 Acc: 0.7688 | Val Loss: 4.7081 Acc: 0.3229 Epoch 78/100 | Tr Loss: 0.2532 Acc: 0.7958 | Val Loss: 2.4601 Acc: 0.4245 Epoch 79/100 | Tr Loss: 0.3616 Acc: 0.7396 | Val Loss: 2.6147 Acc: 0.4010 Epoch 80/100 | Tr Loss: 0.4682 Acc: 0.6854 | Val Loss: 3.9883 Acc: 0.3828 Epoch 81/100 | Tr Loss: 0.3692 Acc: 0.6896 | Val Loss: 1.9320 Acc: 0.4609 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 82/100 | Tr Loss: 0.4935 Acc: 0.7000 | Val Loss: 1.2229 Acc: 0.5417 [New Best] Saved to ./ThesisCheckpoints_v2/task_3_RHO_sel_40_best.pth Epoch 83/100 | Tr Loss: 0.5531 Acc: 0.7417 | Val Loss: 1.4603 Acc: 0.4740 Epoch 84/100 | Tr Loss: 0.4700 Acc: 0.6562 | Val Loss: 1.4210 Acc: 0.4557 Epoch 85/100 | Tr Loss: 0.3229 Acc: 0.7083 | Val Loss: 2.4960 Acc: 0.3151 Epoch 86/100 | Tr Loss: 0.3216 Acc: 0.7312 | Val Loss: 5.4260 Acc: 0.1745 Epoch 87/100 | Tr Loss: 0.3210 Acc: 0.7292 | Val Loss: 2.2066 Acc: 0.3490 Epoch 88/100 | Tr Loss: 0.3095 Acc: 0.6917 | Val Loss: 2.1696 Acc: 0.3776 Epoch 89/100 | Tr Loss: 0.2447 Acc: 0.7604 | Val Loss: 1.7482 Acc: 0.4635 Epoch 90/100 | Tr Loss: 0.1515 Acc: 0.7979 | Val Loss: 1.8998 Acc: 0.4297 Epoch 91/100 | Tr Loss: 0.2392 Acc: 0.7396 | Val Loss: 1.9821 Acc: 0.4557 Epoch 92/100 | Tr Loss: 0.2265 Acc: 0.8208 | Val Loss: 2.0508 Acc: 0.4531 Epoch 93/100 | Tr Loss: 0.1961 Acc: 0.8021 | Val Loss: 2.6084 Acc: 0.3880 Epoch 94/100 | Tr Loss: 0.2815 Acc: 0.7562 | Val Loss: 2.6312 Acc: 0.3464 Epoch 95/100 | Tr Loss: 0.2621 Acc: 0.8187 | Val Loss: 1.7283 Acc: 0.4557 Epoch 96/100 | Tr Loss: 0.3213 Acc: 0.7875 | Val Loss: 4.4963 Acc: 0.2839 Epoch 97/100 | Tr Loss: 0.2978 Acc: 0.8250 | Val Loss: 2.8065 Acc: 0.3672 Epoch 98/100 | Tr Loss: 0.1793 Acc: 0.8063 | Val Loss: 1.2980 Acc: 0.5208 Epoch 99/100 | Tr Loss: 0.3461 Acc: 0.7521 | Val Loss: 1.7552 Acc: 0.4193 Epoch 100/100 | Tr Loss: 0.2158 Acc: 0.8063 | Val Loss: 2.4602 Acc: 0.3932 --- Finished: rho_loss --- ================================================== --- ALL EXPERIMENTS COMPLETE --- ==================================================
8. Results Visualization & Comparison¶
After the full training runs are complete, we will have three dictionaries: results_uniform, results_mkl, and results_rho.
This section provides helper functions to analyze and visualize this data. We will use matplotlib to plot the learning curves and pandas to create a clean summary table of the final/best performance metrics.
plot_results: This function will create side-by-side plots for Loss and Accuracy, overlaying the performance of all algorithms.create_summary_table: This function will extract the single most important metric—the best validation accuracy—and the epoch it was achieved, putting it into a table for easy comparison.
# --- 12. Visualization Functions (Updated for Dictionary Structure) ---
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from IPython.display import display # Needed to show tables in loops
plt.style.use('ggplot')
def plot_task_history(results_dict, task_title):
"""
Plots Train/Val Loss and Accuracy for a single task.
Args:
results_dict: dict { 'AlgoName': {'train_loss':[], ...}, ... }
task_title: str
"""
# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 7))
# Color palette to ensure distinct lines
colors = plt.cm.tab10(np.linspace(0, 1, len(results_dict)))
for (name, metrics), color in zip(results_dict.items(), colors):
epochs = range(1, len(metrics['val_loss']) + 1)
# --- Plot 1: Validation Loss ---
# We focus on Validation Loss for comparison
ax1.plot(epochs, metrics['val_loss'], label=name, linewidth=2.5, color=color)
# Optional: Plot Train loss faintly
# ax1.plot(epochs, metrics['train_loss'], linestyle=':', linewidth=1, alpha=0.5, color=color)
# --- Plot 2: Validation Accuracy ---
val_acc_pct = [x * 100 for x in metrics['val_acc']]
ax2.plot(epochs, val_acc_pct, label=name, linewidth=2.5, color=color)
# Styling Plot 1 (Loss)
ax1.set_title(f"{task_title} - Validation Loss (Lower is Better)", fontsize=14)
ax1.set_xlabel("Epochs", fontsize=12)
ax1.set_ylabel("Loss", fontsize=12)
ax1.legend()
ax1.grid(True)
# Styling Plot 2 (Accuracy)
ax2.set_title(f"{task_title} - Validation Accuracy (Higher is Better)", fontsize=14)
ax2.set_xlabel("Epochs", fontsize=12)
ax2.set_ylabel("Accuracy (%)", fontsize=12)
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.show()
def display_summary_table(results_dict, task_title):
"""
Creates and displays a summary table for a task.
"""
data = []
for name, metrics in results_dict.items():
# Calculate metrics
best_val_acc = max(metrics['val_acc']) * 100
best_epoch = np.argmax(metrics['val_acc']) + 1
final_val_acc = metrics['val_acc'][-1] * 100
final_train_acc = metrics['train_acc'][-1] * 100
# Gap (Generalization Gap)
gen_gap = final_train_acc - final_val_acc
data.append({
'Algorithm': name,
'Best Val Acc (%)': best_val_acc,
'Best Epoch': best_epoch,
'Final Val Acc (%)': final_val_acc,
'Train-Val Gap (%)': gen_gap
})
df = pd.DataFrame(data)
df = df.sort_values(by='Best Val Acc (%)', ascending=False) # Sort by best performance
print(f"\n>>> Summary Table: {task_title}")
# Style the table
styled_df = df.style.format({
'Best Val Acc (%)': '{:.2f}',
'Final Val Acc (%)': '{:.2f}',
'Train-Val Gap (%)': '{:.2f}'
}).background_gradient(subset=['Best Val Acc (%)'], cmap='Greens')
display(styled_df)
def visualize_all_experiments(all_results):
"""
Main function to visualize everything in the results dictionary.
"""
if not all_results:
print("No results to visualize!")
return
for task_name, algorithm_results in all_results.items():
print("\n" + "#"*60)
print(f"VISUALIZING: {task_name}")
print("#"*60)
# 1. Plot Curves
plot_task_history(algorithm_results, task_name)
# 2. Show Table
display_summary_table(algorithm_results, task_name)
# --- EXECUTE VISUALIZATION ---
# Automatically runs on the 'all_experiment_results' variable from Snippet 11b
visualize_all_experiments(all_experiment_results)
# --- 12b. Visualization Functions (Updated & Wrapped) ---
# [ This REPLACES your old Snippet 12 ]
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from IPython.display import display
plt.style.use('ggplot') # Use a nice style for the plots
def plot_results_custom(all_results, algorithm_names, title_prefix=""):
"""
Plots the training/validation loss and accuracy for a custom list of experiments.
"""
# Find the shortest number of epochs in case one run was cut short
if not all_results:
print(f"Skipping plot '{title_prefix}': No results provided.")
return
num_epochs = min(len(res['val_loss']) for res in all_results)
if num_epochs == 0:
print(f"Skipping plot '{title_prefix}': No epoch data found (all runs may have been skipped).")
return
epochs = range(1, num_epochs + 1)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 6)) # Wider figure
# --- Plot 1: Loss ---
ax1.set_title(f"{title_prefix} - Validation Loss")
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Loss")
for results, name in zip(all_results, algorithm_names):
# Plot Validation Loss (Solid Line)
ax1.plot(epochs, results['val_loss'][:num_epochs], label=f'{name}', linewidth=2)
ax1.legend()
ax1.grid(True)
# --- Plot 2: Accuracy ---
ax2.set_title(f"{title_prefix} - Validation Accuracy")
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Accuracy (%)")
for results, name in zip(all_results, algorithm_names):
# Plot Validation Accuracy (Solid Line)
val_acc_percent = [acc * 100 for acc in results['val_acc'][:num_epochs]]
ax2.plot(epochs, val_acc_percent, label=f'{name}', linewidth=2)
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.show()
def create_summary_table(all_results, algorithm_names):
"""
Creates a pandas DataFrame to summarize the key metrics.
"""
summary_data = {
'Algorithm': [],
'Best Val Acc (%)': [],
'Epoch': [],
'Final Val Acc (%)': [],
'Final Train Acc (%)': []
}
for results, name in zip(all_results, algorithm_names):
if not results.get('val_acc'): # Skip if no data
continue
# Find best validation accuracy and its epoch
best_val_acc = max(results['val_acc']) * 100
best_epoch = np.argmax(results['val_acc']) + 1 # +1 for 1-based epoch
# Get final metrics
final_val_acc = results['val_acc'][-1] * 100
final_train_acc = results['train_acc'][-1] * 100
summary_data['Algorithm'].append(name)
summary_data['Best Val Acc (%)'].append(best_val_acc)
summary_data['Epoch'].append(best_epoch)
summary_data['Final Val Acc (%)'].append(final_val_acc)
summary_data['Final Train Acc (%)'].append(final_train_acc)
df = pd.DataFrame(summary_data)
if df.empty:
print("Summary table is empty.")
return None
# Format for better readability
return df.set_index('Algorithm').style.format({
'Best Val Acc (%)': '{:.2f}',
'Final Val Acc (%)': '{:.2f}',
'Final Train Acc (%)': '{:.2f}'
}).highlight_max(subset=['Best Val Acc (%)'], color='lightgreen')
# --- Wrapper to Run on Dictionary Data ---
def visualize_all_tasks(experiment_data_dict):
"""
Iterates through the experiment dictionary and calls the plotting functions.
"""
if not experiment_data_dict:
print("No experiment data found to visualize.")
return
for task_name, runs_dict in experiment_data_dict.items():
print("\n" + "#"*80)
print(f"VISUALIZATION: {task_name}")
print("#"*80)
# Convert Dictionary to Lists for the plotting functions
results_list = []
names_list = []
# Sort keys to make plots consistent (e.g., Uniform first, then MKL, then RHO)
sorted_keys = sorted(runs_dict.keys())
for algo_name in sorted_keys:
results_list.append(runs_dict[algo_name])
names_list.append(algo_name)
# 1. Plot Curves
plot_results_custom(results_list, names_list, title_prefix=task_name)
# 2. Show Summary Table
table = create_summary_table(results_list, names_list)
if table is not None:
print(f"\n>>> Summary Table: {task_name}")
display(table)
# --- EXECUTE ---
# Run visualization on the results from Snippet 11b
visualize_all_tasks(all_experiment_results)
################################################################################ VISUALIZATION: Task 1: CIFAR-100 (40% Label Noise) ################################################################################
>>> Summary Table: Task 1: CIFAR-100 (40% Label Noise)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| MKL (k=1.25) | 47.69 | 97 | 45.71 | 26.74 |
| MKL (k=1.5) | 48.63 | 99 | 48.60 | 29.02 |
| MKL (k=2.0) | 41.86 | 99 | 41.58 | 23.94 |
| RHO (sel 20%) | 39.37 | 99 | 39.32 | 20.50 |
| RHO (sel 30%) | 47.66 | 97 | 46.70 | 27.08 |
| RHO (sel 40%) | 48.90 | 100 | 48.90 | 28.39 |
| Uniform SGD | 46.06 | 97 | 45.35 | 26.68 |
################################################################################ VISUALIZATION: Task 2: MNIST (Input Noise std=1.5) ################################################################################
>>> Summary Table: Task 2: MNIST (Input Noise std=1.5)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| MKL (k=1.25) | 96.08 | 64 | 95.28 | 91.16 |
| MKL (k=1.5) | 93.64 | 83 | 92.53 | 86.23 |
| MKL (k=2.0) | 83.18 | 61 | 81.99 | 77.67 |
| RHO (sel 20%) | 95.94 | 92 | 94.94 | 92.36 |
| RHO (sel 30%) | 96.70 | 52 | 94.66 | 93.06 |
| RHO (sel 40%) | 96.95 | 92 | 95.78 | 92.79 |
| Uniform SGD | 99.18 | 71 | 99.03 | 98.98 |
################################################################################ VISUALIZATION: Task 3: CLOUD (Input Noise std=1.5) ################################################################################
>>> Summary Table: Task 3: CLOUD (Input Noise std=1.5)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| MKL (k=1.25) | 68.23 | 100 | 68.23 | 78.96 |
| MKL (k=1.5) | 63.28 | 85 | 36.98 | 73.75 |
| MKL (k=2.0) | 55.73 | 70 | 24.48 | 58.13 |
| RHO (sel 20%) | 23.44 | 1 | 14.32 | 26.88 |
| RHO (sel 30%) | 43.49 | 74 | 37.76 | 54.37 |
| RHO (sel 40%) | 54.17 | 82 | 39.32 | 80.62 |
| Uniform SGD | 57.03 | 23 | 14.32 | 98.96 |
9. Run Analysis¶
After your main experiment cell (Snippet 11) has finished running without debug mode, all the results_... dictionaries will be populated.
Run the cell below to automatically generate your comparison plots and summary table.
# --- 13. Run Analysis (All Experiments) ---
#
# !! RUN THIS CELL AFTER SNIPPET 11 IS DONE !!
#
print("--- Generating Full Analysis for All Tasks ---")
# Loop through each task we completed
for task_name, task_results_dict in all_experiment_results.items():
print("\n" + "#"*50)
print(f"# ANALYSIS FOR: {task_name}")
print("#"*50)
# 1. Extract the results and names for this task
# (task_results_dict is e.g., {'uniform_sgd': {...}, 'mkl_sgd': {...}, ...})
results_list = list(task_results_dict.values())
algo_names = list(task_results_dict.keys())
# 2. Generate the plots
# (Uses the plot_results function from Snippet 12)
plot_results(
results_list,
algo_names,
title_prefix=task_name
)
# 3. Generate and display the summary table
# (Uses the create_summary_table function from Snippet 12)
summary_df = create_summary_table(results_list, algo_names)
print(f"\n--- {task_name} Summary Table ---")
display(summary_df)
print("\n\n")
--- Generating Full Analysis for All Tasks --- ################################################## # ANALYSIS FOR: Task 1: CIFAR-100 (Label Noise) ##################################################
--- Task 1: CIFAR-100 (Label Noise) Summary Table ---
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| uniform_sgd | 55.63 | 99 | 55.11 | 44.84 |
| mkl_sgd | 44.33 | 99 | 43.19 | 39.15 |
| rho_loss | 41.24 | 94 | 40.48 | 29.34 |
################################################## # ANALYSIS FOR: Task 2: MNIST (Input Noise) ##################################################
--- Task 2: MNIST (Input Noise) Summary Table ---
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| uniform_sgd | 99.34 | 24 | 99.10 | 99.94 |
| mkl_sgd | 67.11 | 28 | 65.60 | 64.00 |
| rho_loss | 99.17 | 87 | 99.03 | 99.55 |
# --- 13b. Run Analysis (Hard Mode - Updated) ---
from IPython.display import display
# --- Helper: Find Best Run ---
def find_best_run(task_results_dict, prefix):
"""
Finds the best performing run from a sweep based on max validation accuracy.
Returns: (name_string, results_dict)
"""
best_acc = -1
best_name = None
best_results = None
# Iterate through all runs in this task
for run_name, results in task_results_dict.items():
# Check if run matches the prefix (e.g., "MKL")
if run_name.lower().startswith(prefix.lower()) and results.get('val_acc'):
max_acc = max(results['val_acc'])
if max_acc > best_acc:
best_acc = max_acc
best_name = run_name
best_results = results
if best_name:
# Return the specific best run
return f"{best_name} (Best)", best_results
else:
# Fallback: Return None if not found
return f"{prefix} (Not Found)", None
print("--- Generating Detailed Analysis for HARD MODE ---")
for task_name, task_results in all_experiment_results.items():
print("\n" + "#"*80)
print(f"# ANALYSIS FOR: {task_name}")
print("#"*80)
all_names = list(task_results.keys())
all_results_list = list(task_results.values())
# 1. Overall Summary Table
print(f"\n>>> Full Summary Table: {task_name}")
summary_df = create_summary_table(all_results_list, all_names)
if summary_df is not None:
display(summary_df)
# 2. Plot: Best Uniform vs. Best MKL vs. Best RHO
# This is the "Money Plot" for the thesis
print(f"\n>>> Comparative Plot: Best Performers ({task_name})")
name_uni, res_uni = find_best_run(task_results, "Uniform")
name_mkl, res_mkl = find_best_run(task_results, "MKL")
name_rho, res_rho = find_best_run(task_results, "RHO")
# Filter out Nones (in case an algorithm wasn't run)
best_runs = []
best_names = []
if res_uni:
best_runs.append(res_uni)
best_names.append(name_uni)
if res_mkl:
best_runs.append(res_mkl)
best_names.append(name_mkl)
if res_rho:
best_runs.append(res_rho)
best_names.append(name_rho)
if best_runs:
plot_results_custom(
best_runs,
best_names,
title_prefix=f"{task_name} - Best vs Best"
)
else:
print("Not enough data for comparison plot.")
# 3. Parameter Sweep Plots
# A. MKL Sweep
mkl_names = sorted([n for n in all_names if "MKL" in n])
if mkl_names:
print(f"\n>>> Parameter Sweep: MKL ({task_name})")
# Include Uniform as baseline if available
sweep_runs = [task_results[n] for n in mkl_names]
sweep_labels = mkl_names
if res_uni:
sweep_runs.insert(0, res_uni)
sweep_labels.insert(0, "Baseline (Uniform)")
plot_results_custom(sweep_runs, sweep_labels, title_prefix=f"{task_name} - MKL Sensitivity")
# B. RHO Sweep
rho_names = sorted([n for n in all_names if "RHO" in n])
if rho_names:
print(f"\n>>> Parameter Sweep: RHO ({task_name})")
sweep_runs = [task_results[n] for n in rho_names]
sweep_labels = rho_names
if res_uni:
sweep_runs.insert(0, res_uni)
sweep_labels.insert(0, "Baseline (Uniform)")
plot_results_custom(sweep_runs, sweep_labels, title_prefix=f"{task_name} - RHO Sensitivity")
--- Generating Detailed Analysis for HARD MODE --- ################################################################################ # ANALYSIS FOR: Task 1: CIFAR-100 (40% Label Noise) ################################################################################ >>> Full Summary Table: Task 1: CIFAR-100 (40% Label Noise)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 46.06 | 97 | 45.35 | 26.68 |
| MKL (k=2.0) | 41.86 | 99 | 41.58 | 23.94 |
| MKL (k=1.5) | 48.63 | 99 | 48.60 | 29.02 |
| MKL (k=1.25) | 47.69 | 97 | 45.71 | 26.74 |
| RHO (sel 20%) | 39.37 | 99 | 39.32 | 20.50 |
| RHO (sel 30%) | 47.66 | 97 | 46.70 | 27.08 |
| RHO (sel 40%) | 48.90 | 100 | 48.90 | 28.39 |
>>> Comparative Plot: Best Performers (Task 1: CIFAR-100 (40% Label Noise))
>>> Parameter Sweep: MKL (Task 1: CIFAR-100 (40% Label Noise))
>>> Parameter Sweep: RHO (Task 1: CIFAR-100 (40% Label Noise))
################################################################################ # ANALYSIS FOR: Task 2: MNIST (Input Noise std=1.5) ################################################################################ >>> Full Summary Table: Task 2: MNIST (Input Noise std=1.5)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 99.18 | 71 | 99.03 | 98.98 |
| MKL (k=2.0) | 83.18 | 61 | 81.99 | 77.67 |
| MKL (k=1.5) | 93.64 | 83 | 92.53 | 86.23 |
| MKL (k=1.25) | 96.08 | 64 | 95.28 | 91.16 |
| RHO (sel 20%) | 95.94 | 92 | 94.94 | 92.36 |
| RHO (sel 30%) | 96.70 | 52 | 94.66 | 93.06 |
| RHO (sel 40%) | 96.95 | 92 | 95.78 | 92.79 |
>>> Comparative Plot: Best Performers (Task 2: MNIST (Input Noise std=1.5))
>>> Parameter Sweep: MKL (Task 2: MNIST (Input Noise std=1.5))
>>> Parameter Sweep: RHO (Task 2: MNIST (Input Noise std=1.5))
################################################################################ # ANALYSIS FOR: Task 3: CLOUD (Input Noise std=1.5) ################################################################################ >>> Full Summary Table: Task 3: CLOUD (Input Noise std=1.5)
| Best Val Acc (%) | Epoch | Final Val Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 57.03 | 23 | 14.32 | 98.96 |
| MKL (k=2.0) | 55.73 | 70 | 24.48 | 58.13 |
| MKL (k=1.5) | 63.28 | 85 | 36.98 | 73.75 |
| MKL (k=1.25) | 68.23 | 100 | 68.23 | 78.96 |
| RHO (sel 20%) | 23.44 | 1 | 14.32 | 26.88 |
| RHO (sel 30%) | 43.49 | 74 | 37.76 | 54.37 |
| RHO (sel 40%) | 54.17 | 82 | 39.32 | 80.62 |
>>> Comparative Plot: Best Performers (Task 3: CLOUD (Input Noise std=1.5))
>>> Parameter Sweep: MKL (Task 3: CLOUD (Input Noise std=1.5))
>>> Parameter Sweep: RHO (Task 3: CLOUD (Input Noise std=1.5))
# --- 13c. Run Analysis (Renamed for Clarity) ---
from IPython.display import display
def create_summary_table_renamed(all_results, algorithm_names):
"""
Creates a summary table with explicit 'Test Accuracy' labels.
"""
summary_data = {
'Algorithm': [],
'Best Test Acc (%)': [],
'Epoch': [],
'Final Test Acc (%)': [],
'Final Train Acc (%)': []
}
for results, name in zip(all_results, algorithm_names):
if not results.get('val_acc'): continue
# "val_acc" in the code IS the Test Accuracy (evaluated on test_loader)
best_test_acc = max(results['val_acc']) * 100
best_epoch = np.argmax(results['val_acc']) + 1
final_test_acc = results['val_acc'][-1] * 100
final_train_acc = results['train_acc'][-1] * 100
summary_data['Algorithm'].append(name)
summary_data['Best Test Acc (%)'].append(best_test_acc)
summary_data['Epoch'].append(best_epoch)
summary_data['Final Test Acc (%)'].append(final_test_acc)
summary_data['Final Train Acc (%)'].append(final_train_acc)
df = pd.DataFrame(summary_data)
if df.empty: return None
return df.set_index('Algorithm').style.format({
'Best Test Acc (%)': '{:.2f}',
'Final Test Acc (%)': '{:.2f}',
'Final Train Acc (%)': '{:.2f}'
}).highlight_max(subset=['Best Test Acc (%)'], color='lightgreen')
print("--- Generating Analysis (Labels Corrected to 'Test Acc') ---")
for task_name, task_results in all_experiment_results.items():
print("\n" + "#"*80)
print(f"# ANALYSIS FOR: {task_name}")
print("#"*80)
all_names = list(task_results.keys())
all_results_list = list(task_results.values())
# 1. Summary Table
print(f"\n>>> Full Summary Table: {task_name}")
summary_df = create_summary_table_renamed(all_results_list, all_names)
if summary_df is not None:
display(summary_df)
# 2. Plot: Best Performers
print(f"\n>>> Comparative Plot: Best Performers ({task_name})")
name_uni, res_uni = find_best_run(task_results, "Uniform")
name_mkl, res_mkl = find_best_run(task_results, "MKL")
name_rho, res_rho = find_best_run(task_results, "RHO")
best_runs = [r for r in [res_uni, res_mkl, res_rho] if r]
best_names = [n for n in [name_uni, name_mkl, name_rho] if n]
if best_runs:
plot_results_custom(best_runs, best_names, title_prefix=f"{task_name} - Best vs Best")
--- Generating Analysis (Labels Corrected to 'Test Acc') --- ################################################################################ # ANALYSIS FOR: Task 1: CIFAR-100 (40% Label Noise) ################################################################################ >>> Full Summary Table: Task 1: CIFAR-100 (40% Label Noise)
| Best Test Acc (%) | Epoch | Final Test Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 46.06 | 97 | 45.35 | 26.68 |
| MKL (k=2.0) | 41.86 | 99 | 41.58 | 23.94 |
| MKL (k=1.5) | 48.63 | 99 | 48.60 | 29.02 |
| MKL (k=1.25) | 47.69 | 97 | 45.71 | 26.74 |
| RHO (sel 20%) | 39.37 | 99 | 39.32 | 20.50 |
| RHO (sel 30%) | 47.66 | 97 | 46.70 | 27.08 |
| RHO (sel 40%) | 48.90 | 100 | 48.90 | 28.39 |
>>> Comparative Plot: Best Performers (Task 1: CIFAR-100 (40% Label Noise))
################################################################################ # ANALYSIS FOR: Task 2: MNIST (Input Noise std=1.5) ################################################################################ >>> Full Summary Table: Task 2: MNIST (Input Noise std=1.5)
| Best Test Acc (%) | Epoch | Final Test Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 99.18 | 71 | 99.03 | 98.98 |
| MKL (k=2.0) | 83.18 | 61 | 81.99 | 77.67 |
| MKL (k=1.5) | 93.64 | 83 | 92.53 | 86.23 |
| MKL (k=1.25) | 96.08 | 64 | 95.28 | 91.16 |
| RHO (sel 20%) | 95.94 | 92 | 94.94 | 92.36 |
| RHO (sel 30%) | 96.70 | 52 | 94.66 | 93.06 |
| RHO (sel 40%) | 96.95 | 92 | 95.78 | 92.79 |
>>> Comparative Plot: Best Performers (Task 2: MNIST (Input Noise std=1.5))
################################################################################ # ANALYSIS FOR: Task 3: CLOUD (Input Noise std=1.5) ################################################################################ >>> Full Summary Table: Task 3: CLOUD (Input Noise std=1.5)
| Best Test Acc (%) | Epoch | Final Test Acc (%) | Final Train Acc (%) | |
|---|---|---|---|---|
| Algorithm | ||||
| Uniform SGD | 57.03 | 23 | 14.32 | 98.96 |
| MKL (k=2.0) | 55.73 | 70 | 24.48 | 58.13 |
| MKL (k=1.5) | 63.28 | 85 | 36.98 | 73.75 |
| MKL (k=1.25) | 68.23 | 100 | 68.23 | 78.96 |
| RHO (sel 20%) | 23.44 | 1 | 14.32 | 26.88 |
| RHO (sel 30%) | 43.49 | 74 | 37.76 | 54.37 |
| RHO (sel 40%) | 54.17 | 82 | 39.32 | 80.62 |
>>> Comparative Plot: Best Performers (Task 3: CLOUD (Input Noise std=1.5))
# --- 14. MNIST Signal-to-Noise Calculation ---
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 1. Load Clean MNIST Data (Validation set is enough for calculation)
# We use the standard normalization (mean 0.1307, std 0.3081)
# because that's what the model actually sees.
transform_clean = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# Download just the test set for quick calculation
mnist_calc_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform_clean)
loader = DataLoader(mnist_calc_data, batch_size=1000, shuffle=False)
# 2. Calculate "Signal Power" (E[x^2])
# We calculate the mean of the squared pixels across the dataset
print("--- Calculating MNIST Signal Power ---")
squared_sum = 0.0
total_pixels = 0
for data, _ in loader:
# data is shape [batch, 1, 28, 28]
# Square every pixel value
squared_pixels = data ** 2
squared_sum += squared_pixels.sum().item()
total_pixels += data.numel()
# E[X^2] for the Signal
signal_power = squared_sum / total_pixels
print(f"Signal Power (E[x^2] of normalized MNIST): {signal_power:.4f}")
# 3. Compare with Noise Power for different std values
print("\n--- Comparison with Proposed Noise Levels ---")
std_values = [0.5, 1.0, 1.5, 2.0, 2.5]
for std in std_values:
noise_power = std ** 2 # E[epsilon^2] = variance
# Signal to Noise Ratio (SNR)
# If SNR < 1, the Noise is stronger than the Signal
snr = signal_power / noise_power
print(f"std = {std:.1f} | Noise Power: {noise_power:.2f} | SNR: {snr:.4f}")
if snr < 0.2:
print(f" -> CRITICAL: Noise is 5x stronger than signal. (X^T X)^-1 approx 0.")
elif snr < 0.5:
print(f" -> HARD: Noise is 2x stronger than signal. Good for robust testing.")
elif snr < 1.0:
print(f" -> MEDIUM: Noise is stronger than signal, but learnable.")
else:
print(f" -> EASY: Signal is stronger than noise (This explains your 99% result).")
--- Calculating MNIST Signal Power --- Signal Power (E[x^2] of normalized MNIST): 1.0155 --- Comparison with Proposed Noise Levels --- std = 0.5 | Noise Power: 0.25 | SNR: 4.0622 -> EASY: Signal is stronger than noise (This explains your 99% result). std = 1.0 | Noise Power: 1.00 | SNR: 1.0155 -> EASY: Signal is stronger than noise (This explains your 99% result). std = 1.5 | Noise Power: 2.25 | SNR: 0.4514 -> HARD: Noise is 2x stronger than signal. Good for robust testing. std = 2.0 | Noise Power: 4.00 | SNR: 0.2539 -> HARD: Noise is 2x stronger than signal. Good for robust testing. std = 2.5 | Noise Power: 6.25 | SNR: 0.1625 -> CRITICAL: Noise is 5x stronger than signal. (X^T X)^-1 approx 0.
# --- CLEANUP: Delete ALL Task 3 Experiment Checkpoints ---
import os
import glob
# Define path (Same as your other snippets)
try:
import google.colab
BASE_PATH = '/content/drive/MyDrive'
except ImportError:
BASE_PATH = '.'
checkpoint_dir = os.path.join(BASE_PATH, 'ThesisCheckpoints_v2')
# Pattern to find all Task 3 experiment files
# This matches 'task_3_Uniform_SGD.pth', 'task_3_MKL...', etc.
files_to_delete = glob.glob(os.path.join(checkpoint_dir, "task_3_*.pth"))
print(f"Checking {checkpoint_dir}...")
print(f"Found {len(files_to_delete)} old Task 3 experiment checkpoints.")
for f in files_to_delete:
try:
os.remove(f)
print(f"Deleted: {os.path.basename(f)}")
except OSError as e:
print(f"Error deleting {f}: {e}")
print("--- Task 3 Cleanup Complete. Safe to run Snippet 11b. ---")
Checking ./ThesisCheckpoints_v2... Found 14 old Task 3 experiment checkpoints. Deleted: task_3_MKL_k=1_25.pth Deleted: task_3_Uniform_SGD_best.pth Deleted: task_3_MKL_k=1_5_best.pth Deleted: task_3_MKL_k=2_0_best.pth Deleted: task_3_RHO_sel_40.pth Deleted: task_3_MKL_k=1_5.pth Deleted: task_3_MKL_k=2_0.pth Deleted: task_3_RHO_sel_30.pth Deleted: task_3_RHO_sel_20.pth Deleted: task_3_RHO_sel_20_best.pth Deleted: task_3_MKL_k=1_25_best.pth Deleted: task_3_RHO_sel_40_best.pth Deleted: task_3_RHO_sel_30_best.pth Deleted: task_3_Uniform_SGD.pth --- Task 3 Cleanup Complete. Safe to run Snippet 11b. ---